import streamlit as st from langchain_community.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain_community.embeddings import OpenAIEmbeddings from langchain_community.vectorstores import Chroma from langchain_community.chains import RetrievalQA from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings class TextLoader: def __init__(self, file): self.file = file def load(self): return self.file.read().decode("utf-8") st.title("Please upload files that are txt format") uploaded_file = st.file_uploader("Choose a text file", type=["txt"]) if uploaded_file is not None: with open("uploaded_file.txt", "wb") as f: f.write(uploaded_file.getbuffer()) text_loader = TextLoader(open("uploaded_file.txt","rb")) documents = text_loader.load() # st.write(documents) text_splitter = CharacterTextSplitter (chunk_size=200, chunk_overlap=0) texts= text_splitter.split_documents(documents) st.write(texts) # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") # db = Chroma.from_documents(texts, embeddings) # db._collection.get(include=['embeddings']) # retriever = db.as_retriever(search_kwargs={"k": 1}) # docs = retriever.get_relevant_documents("What is the capital of india?") # st.write("Answer") # st.text(docs) # # st.write("File content:") # # st.text(file_content)