Spaces:
Sleeping
Sleeping
File size: 2,628 Bytes
b6b2dea c1e2c12 4348f11 42677de 570700c b6b2dea 6c3331a 50e6b2d 161ffe2 399e958 589eae5 b6b2dea 589eae5 b6b2dea 589eae5 b6b2dea 589eae5 36cc5e8 161ffe2 6c3331a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# import streamlit as st
# from langchain.text_splitter import CharacterTextSplitter
# from langchain_community.document_loaders import TextLoader
# from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import Chroma
# from langchain.chains import RetrievalQA
# from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
# def get_text():
# input_text = st.text_input("You: ", key="input")
# return input_text
# user_input = get_text()
# submit = st.button('Get Answer')
# loader = TextLoader('India.txt')
# documents =loader.load()
# text_splitter = CharacterTextSplitter (chunk_size=200,
# chunk_overlap=0)
# texts= text_splitter.split_documents(documents)
# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# db = Chroma.from_documents(texts, embeddings)
# db._collection.get(include=['embeddings'])
# retriever = db.as_retriever(search_kwargs={"k": 1})
# if user_input and submit:
# docs = retriever.get_relevant_documents(user_input)
# st.write("Answer")
# document = docs[0]
# page_content = document.page_content
# st.write(page_content)
# # st.text(file_content)
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.base import Embeddings
import numpy as np
class CharLevelEmbeddings(Embeddings):
def embed_documents(self, texts):
return [self.embed_text(text) for text in texts]
def embed_text(self, text):
# Example: Simple character-level embedding by converting characters to their ASCII values.
# In practice, use a more sophisticated method or pretrained model.
return np.array([ord(char) for char in text])
def embed_query(self, text):
return self.embed_text(text)
def get_text():
input_text = st.text_input("You: ", key="input")
return input_text
user_input = get_text()
submit = st.button('Get Answer')
loader = TextLoader('India.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = CharLevelEmbeddings()
db = Chroma.from_documents(texts, embeddings)
db._collection.get(include=['embeddings'])
retriever = db.as_retriever(search_kwargs={"k": 1})
if user_input and submit:
docs = retriever.get_relevant_documents(user_input)
st.write("Answer")
document = docs[0]
page_content = document.page_content
st.write(page_content)
|