import streamlit as st from langchain_community.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain_community.vectorstores import Chroma from langchain.chains import RetrievalQA from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings class TextLoader: def __init__(self, file): self.file = file def load(self): return self.file.read().decode("utf-8") st.title("Please upload files that are txt format") uploaded_file = st.file_uploader("Choose a text file", type=["txt"]) if uploaded_file is not None: with open("uploaded_file.txt", "wb") as f: f.write(uploaded_file.getbuffer()) text_loader = TextLoader(open("uploaded_file.txt","rb")) documents = text_loader.load() text_splitter = CharacterTextSplitter (chunk_size=200, chunk_overlap=0) texts= text_splitter.split_documents(documents) embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") db = Chroma.from_documents(texts, embeddings) db._collection.get(include=['embeddings']) retriever = db.as_retriever(search_kwargs={"k": 1}) docs = retriever.get_relevant_documents("What is the capital of india?") st.write("Answer") st.text(docs) # st.write("File content:") # st.text(file_content)