import streamlit as st import re import os from langchain_chroma import Chroma from langchain_community.document_loaders import WebBaseLoader from langchain_core.output_parsers import StrOutputParser from langchain_text_splitters import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import bs4 import torch from transformers import pipeline # Define the embedding class class SentenceTransformerEmbedding: def __init__(self, model_name): self.model = SentenceTransformer(model_name) def embed_documents(self, texts): embeddings = self.model.encode(texts, convert_to_tensor=True) if isinstance(embeddings, torch.Tensor): return embeddings.cpu().detach().numpy().tolist() # Convert tensor to list return embeddings def embed_query(self, query): embedding = self.model.encode([query], convert_to_tensor=True) if isinstance(embedding, torch.Tensor): return embedding.cpu().detach().numpy().tolist()[0] # Convert tensor to list return embedding[0] # Streamlit UI setup st.title("🤖 Chatbot with URL-based Document Retrieval") # Sidebar Style with Multicolored Background sidebar_bg_style = """ """ st.markdown(sidebar_bg_style, unsafe_allow_html=True) # Main Content Style with Multicolored Background main_bg_style = """ """ st.markdown(main_bg_style, unsafe_allow_html=True) # Sidebar: Input for URL and API keys st.sidebar.title("Settings") # Input field for entering URL dynamically with placeholder and help text url_input = st.sidebar.text_input("Enter Blog Post URL", placeholder="e.g., https://example.com/blog", help="Paste the full URL of the blog post you want to retrieve data from") # Validate the URL and show a success message when correct if url_input: if re.match(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", url_input): st.sidebar.markdown('
URL is correctly entered
', unsafe_allow_html=True) else: st.sidebar.markdown('Invalid URL, please enter a valid one
', unsafe_allow_html=True) # Option to use pre-provided API keys use_preprovided_keys = st.sidebar.checkbox("Use pre-provided API keys") # Input fields for API keys with placeholders and helper text if not use_preprovided_keys: api_key_1 = st.sidebar.text_input("Enter LangChain API Key", type="password", placeholder="Enter your LangChain API Key", help="Please enter a valid LangChain API key here") api_key_2 = st.sidebar.text_input("Enter Groq API Key", type="password", placeholder="Enter your Groq API Key", help="Please enter your Groq API key here") else: api_key_1 = "your-preprovided-langchain-api-key" # Replace with your actual pre-provided key api_key_2 = "your-preprovided-groq-api-key" # Replace with your actual pre-provided key st.sidebar.markdown('Using pre-provided API keys
', unsafe_allow_html=True) # Submit button for API keys with a success/warning message if st.sidebar.button("Submit API Keys"): if use_preprovided_keys or (api_key_1 and api_key_2): os.environ["LANGCHAIN_API_KEY"] = api_key_1 os.environ["GROQ_API_KEY"] = api_key_2 st.sidebar.markdown('API keys are set
', unsafe_allow_html=True) else: st.sidebar.markdown('Please fill in both API keys or select the option to use pre-provided keys
', unsafe_allow_html=True) # Marquee effect with bold, stylish text and a LinkedIn link st.markdown(""" """, unsafe_allow_html=True) # Title of the chatbot st.markdown('