RetreivalData / app.py
TarunEnma's picture
Update app.py
6c3331a verified
raw
history blame
2.63 kB
# import streamlit as st
# from langchain.text_splitter import CharacterTextSplitter
# from langchain_community.document_loaders import TextLoader
# from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import Chroma
# from langchain.chains import RetrievalQA
# from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
# def get_text():
# input_text = st.text_input("You: ", key="input")
# return input_text
# user_input = get_text()
# submit = st.button('Get Answer')
# loader = TextLoader('India.txt')
# documents =loader.load()
# text_splitter = CharacterTextSplitter (chunk_size=200,
# chunk_overlap=0)
# texts= text_splitter.split_documents(documents)
# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# db = Chroma.from_documents(texts, embeddings)
# db._collection.get(include=['embeddings'])
# retriever = db.as_retriever(search_kwargs={"k": 1})
# if user_input and submit:
# docs = retriever.get_relevant_documents(user_input)
# st.write("Answer")
# document = docs[0]
# page_content = document.page_content
# st.write(page_content)
# # st.text(file_content)
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.base import Embeddings
import numpy as np
class CharLevelEmbeddings(Embeddings):
def embed_documents(self, texts):
return [self.embed_text(text) for text in texts]
def embed_text(self, text):
# Example: Simple character-level embedding by converting characters to their ASCII values.
# In practice, use a more sophisticated method or pretrained model.
return np.array([ord(char) for char in text])
def embed_query(self, text):
return self.embed_text(text)
def get_text():
input_text = st.text_input("You: ", key="input")
return input_text
user_input = get_text()
submit = st.button('Get Answer')
loader = TextLoader('India.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = CharLevelEmbeddings()
db = Chroma.from_documents(texts, embeddings)
db._collection.get(include=['embeddings'])
retriever = db.as_retriever(search_kwargs={"k": 1})
if user_input and submit:
docs = retriever.get_relevant_documents(user_input)
st.write("Answer")
document = docs[0]
page_content = document.page_content
st.write(page_content)