# import streamlit as st # from langchain.text_splitter import CharacterTextSplitter # from langchain_community.document_loaders import TextLoader # from langchain.embeddings import OpenAIEmbeddings # from langchain.vectorstores import Chroma # from langchain.chains import RetrievalQA # from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings # def get_text(): # input_text = st.text_input("You: ", key="input") # return input_text # user_input = get_text() # submit = st.button('Get Answer') # loader = TextLoader('India.txt') # documents =loader.load() # text_splitter = CharacterTextSplitter (chunk_size=200, # chunk_overlap=0) # texts= text_splitter.split_documents(documents) # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") # db = Chroma.from_documents(texts, embeddings) # db._collection.get(include=['embeddings']) # retriever = db.as_retriever(search_kwargs={"k": 1}) # if user_input and submit: # docs = retriever.get_relevant_documents(user_input) # st.write("Answer") # document = docs[0] # page_content = document.page_content # st.write(page_content) # # st.text(file_content) import streamlit as st from langchain.text_splitter import CharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain.vectorstores import Chroma from langchain.embeddings.base import Embeddings import numpy as np class CharLevelEmbeddings(Embeddings): def embed_documents(self, texts): return [self.embed_text(text) for text in texts] def embed_text(self, text): # Example: Simple character-level embedding by converting characters to their ASCII values. # In practice, use a more sophisticated method or pretrained model. return np.array([ord(char) for char in text]) def embed_query(self, text): return self.embed_text(text) def get_text(): input_text = st.text_input("You: ", key="input") return input_text user_input = get_text() submit = st.button('Get Answer') loader = TextLoader('India.txt') documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0) texts = text_splitter.split_documents(documents) embeddings = CharLevelEmbeddings() db = Chroma.from_documents(texts, embeddings) db._collection.get(include=['embeddings']) retriever = db.as_retriever(search_kwargs={"k": 1}) if user_input and submit: docs = retriever.get_relevant_documents(user_input) st.write("Answer") document = docs[0] page_content = document.page_content st.write(page_content)