Spaces:
Sleeping
Sleeping
File size: 1,432 Bytes
c1e2c12 50e6b2d 2da293c 50e6b2d 2da293c 50e6b2d c1e2c12 50e6b2d c354300 50e6b2d c354300 28c9957 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import streamlit as st
from langchain_community.document_loaders import TextLoader
from langchain_community.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.chains import RetrievalQA
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
class TextLoader:
def __init__(self, file):
self.file = file
def load(self):
return self.file.read().decode("utf-8")
st.title("Please upload files that are txt format")
uploaded_file = st.file_uploader("Choose a text file", type=["txt"])
if uploaded_file is not None:
with open("uploaded_file.txt", "wb") as f:
f.write(uploaded_file.getbuffer())
text_loader = TextLoader(open("uploaded_file.txt","rb"))
documents = text_loader.load()
# st.write(documents)
text_splitter = CharacterTextSplitter (chunk_size=200,
chunk_overlap=0)
texts= text_splitter.split_documents(documents)
st.write(texts)
# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# db = Chroma.from_documents(texts, embeddings)
# db._collection.get(include=['embeddings'])
# retriever = db.as_retriever(search_kwargs={"k": 1})
# docs = retriever.get_relevant_documents("What is the capital of india?")
# st.write("Answer")
# st.text(docs)
# # st.write("File content:")
# # st.text(file_content) |