File size: 2,628 Bytes
b6b2dea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1e2c12
4348f11
42677de
570700c
b6b2dea
 
 
 
 
 
 
 
 
 
 
6c3331a
 
 
50e6b2d
161ffe2
 
 
 
 
399e958
589eae5
 
b6b2dea
589eae5
b6b2dea
 
589eae5
b6b2dea
589eae5
 
 
36cc5e8
161ffe2
 
 
 
 
 
 
6c3331a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# import streamlit as st
# from langchain.text_splitter import CharacterTextSplitter
# from langchain_community.document_loaders import TextLoader
# from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import Chroma
# from langchain.chains import RetrievalQA
# from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

# def get_text():
#     input_text = st.text_input("You: ", key="input")
#     return input_text


# user_input = get_text()
# submit = st.button('Get Answer')


# loader = TextLoader('India.txt')
# documents =loader.load()

# text_splitter = CharacterTextSplitter (chunk_size=200,
# chunk_overlap=0)

# texts= text_splitter.split_documents(documents)



# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# db = Chroma.from_documents(texts, embeddings)
# db._collection.get(include=['embeddings'])
# retriever = db.as_retriever(search_kwargs={"k": 1})

# if user_input and submit:
    
#     docs = retriever.get_relevant_documents(user_input)
#     st.write("Answer")
#     document = docs[0]
#     page_content = document.page_content

#     st.write(page_content)
# # st.text(file_content)

import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.base import Embeddings
import numpy as np

class CharLevelEmbeddings(Embeddings):
    def embed_documents(self, texts):
        return [self.embed_text(text) for text in texts]
    
    def embed_text(self, text):
        # Example: Simple character-level embedding by converting characters to their ASCII values.
        # In practice, use a more sophisticated method or pretrained model.
        return np.array([ord(char) for char in text])
    
    def embed_query(self, text):
        return self.embed_text(text)

def get_text():
    input_text = st.text_input("You: ", key="input")
    return input_text

user_input = get_text()
submit = st.button('Get Answer')

loader = TextLoader('India.txt')
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings = CharLevelEmbeddings()
db = Chroma.from_documents(texts, embeddings)
db._collection.get(include=['embeddings'])
retriever = db.as_retriever(search_kwargs={"k": 1})

if user_input and submit:
    docs = retriever.get_relevant_documents(user_input)
    st.write("Answer")
    document = docs[0]
    page_content = document.page_content

    st.write(page_content)