Spaces:
Sleeping
Sleeping
yogjoshi14
commited on
Commit
•
7a2d0fe
1
Parent(s):
011d448
tested
Browse files
app.py
CHANGED
@@ -17,7 +17,6 @@ import textract
|
|
17 |
st.set_page_config(page_title="chatbot")
|
18 |
st.title("Chat with Documents")
|
19 |
|
20 |
-
|
21 |
num_of_top_selection = 3
|
22 |
CHUNK_SIZE = 500
|
23 |
CHUNK_OVERLAP = 50
|
@@ -63,8 +62,7 @@ def get_text_from_docx(docx):
|
|
63 |
return text
|
64 |
|
65 |
def get_text_from_text_file(text_file):
|
66 |
-
|
67 |
-
text = file.read()
|
68 |
return text
|
69 |
|
70 |
def get_text_from_other_file(file_path):
|
@@ -83,7 +81,7 @@ def load_documents(docs):
|
|
83 |
elif doc.name.lower().endswith('.docx'):
|
84 |
text += get_text_from_docx(doc)
|
85 |
elif doc.name.lower().endswith(('.txt', '.md')):
|
86 |
-
text += get_text_from_text_file(doc)
|
87 |
else:
|
88 |
# Handle other file types, you can extend this as needed
|
89 |
text += get_text_from_other_file(doc)
|
@@ -128,7 +126,7 @@ def input_fields():
|
|
128 |
# st.text_input("Pinecone environment")
|
129 |
st.session_state.pinecone_index = index_name
|
130 |
# st.text_input("Pinecone index name")
|
131 |
-
st.session_state.source_docs = st.file_uploader(label="Upload Documents",
|
132 |
#
|
133 |
|
134 |
|
@@ -137,7 +135,8 @@ def process_documents():
|
|
137 |
if not st.session_state.pinecone_api_key or not st.session_state.pinecone_env or not st.session_state.pinecone_index or not st.session_state.source_docs:
|
138 |
st.warning(f"Please upload the documents and provide the missing fields.")
|
139 |
else:
|
140 |
-
try:
|
|
|
141 |
# for source_doc in st.session_state.source_docs:
|
142 |
if st.session_state.source_docs:
|
143 |
#
|
@@ -149,8 +148,8 @@ def process_documents():
|
|
149 |
texts = split_documents(documents)
|
150 |
#
|
151 |
st.session_state.retriever = embeddings_on_pinecone(texts)
|
152 |
-
except Exception as e:
|
153 |
-
|
154 |
|
155 |
def boot():
|
156 |
#
|
|
|
17 |
st.set_page_config(page_title="chatbot")
|
18 |
st.title("Chat with Documents")
|
19 |
|
|
|
20 |
num_of_top_selection = 3
|
21 |
CHUNK_SIZE = 500
|
22 |
CHUNK_OVERLAP = 50
|
|
|
62 |
return text
|
63 |
|
64 |
def get_text_from_text_file(text_file):
|
65 |
+
text = text_file.read()
|
|
|
66 |
return text
|
67 |
|
68 |
def get_text_from_other_file(file_path):
|
|
|
81 |
elif doc.name.lower().endswith('.docx'):
|
82 |
text += get_text_from_docx(doc)
|
83 |
elif doc.name.lower().endswith(('.txt', '.md')):
|
84 |
+
text += str(get_text_from_text_file(doc))
|
85 |
else:
|
86 |
# Handle other file types, you can extend this as needed
|
87 |
text += get_text_from_other_file(doc)
|
|
|
126 |
# st.text_input("Pinecone environment")
|
127 |
st.session_state.pinecone_index = index_name
|
128 |
# st.text_input("Pinecone index name")
|
129 |
+
st.session_state.source_docs = st.file_uploader(label="Upload Documents", accept_multiple_files=True)
|
130 |
#
|
131 |
|
132 |
|
|
|
135 |
if not st.session_state.pinecone_api_key or not st.session_state.pinecone_env or not st.session_state.pinecone_index or not st.session_state.source_docs:
|
136 |
st.warning(f"Please upload the documents and provide the missing fields.")
|
137 |
else:
|
138 |
+
# try:
|
139 |
+
if True:
|
140 |
# for source_doc in st.session_state.source_docs:
|
141 |
if st.session_state.source_docs:
|
142 |
#
|
|
|
148 |
texts = split_documents(documents)
|
149 |
#
|
150 |
st.session_state.retriever = embeddings_on_pinecone(texts)
|
151 |
+
# except Exception as e:
|
152 |
+
# st.error(f"An error occurred: {e}")
|
153 |
|
154 |
def boot():
|
155 |
#
|