Jawad138 commited on
Commit
a948408
·
1 Parent(s): abb41df

update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -28
app.py CHANGED
@@ -6,10 +6,11 @@ from langchain.llms import Replicate
6
  from langchain.text_splitter import CharacterTextSplitter
7
  from langchain.vectorstores import FAISS
8
  from langchain.memory import ConversationBufferMemory
9
- from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
 
 
10
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
11
  import os
12
- from dotenv import load_dotenv
13
  import tempfile
14
 
15
  load_dotenv()
@@ -29,7 +30,7 @@ def conversation_chat(query, chain, history):
29
  history.append((query, result["answer"]))
30
  return result["answer"]
31
 
32
- def display_chat_history(chain):
33
  reply_container = st.container()
34
  container = st.container()
35
 
@@ -47,27 +48,16 @@ def display_chat_history(chain):
47
  message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
48
  message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
49
 
50
- def create_conversational_chain(vector_store):
51
  replicate_api_token = "r8_AA3K1fhDykqLa5M74E5V0w5ss1z0P9S3foWJl" # Replace with your actual token
52
  os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
53
 
54
- # Add debugging statements
55
- st.write("Text chunks lengths:", [len(chunk) for chunk in text_chunks])
56
- st.write("Text chunks content:", text_chunks)
57
-
58
- # Create embeddings
59
- st.write("Creating embeddings...")
60
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
61
- model_kwargs={'device': 'cpu'})
62
-
63
- # Add debugging statements
64
- st.write("Embeddings lengths:", [len(emb) for emb in embeddings])
65
- st.write("Embeddings content:", embeddings)
66
-
67
- vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
68
-
69
- st.write("Creating conversation chain...")
70
-
71
  llm = Replicate(
72
  streaming=True,
73
  model="replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
@@ -82,9 +72,8 @@ def create_conversational_chain(vector_store):
82
  memory=memory)
83
  return chain
84
 
85
-
86
-
87
  def main():
 
88
  initialize_session_state()
89
  st.title("Chat With Your Doc")
90
  st.sidebar.title("Document Processing")
@@ -101,7 +90,7 @@ def main():
101
  loader = None
102
  if file_extension == ".pdf":
103
  loader = PyPDFLoader(temp_file_path)
104
- elif file_extension in (".docx", ".doc"):
105
  loader = Docx2txtLoader(temp_file_path)
106
  elif file_extension == ".txt":
107
  loader = TextLoader(temp_file_path)
@@ -113,11 +102,12 @@ def main():
113
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
114
  text_chunks = text_splitter.split_documents(text)
115
 
116
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
117
- model_kwargs={'device': 'cpu'})
118
  vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
119
- chain = create_conversational_chain(vector_store)
120
- display_chat_history(chain)
 
 
121
 
122
  if __name__ == "__main__":
123
  main()
 
6
  from langchain.text_splitter import CharacterTextSplitter
7
  from langchain.vectorstores import FAISS
8
  from langchain.memory import ConversationBufferMemory
9
+ from langchain.document_loaders import PyPDFLoader
10
+ from langchain.document_loaders import TextLoader
11
+ from langchain.document_loaders import Docx2txtLoader
12
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
13
  import os
 
14
  import tempfile
15
 
16
  load_dotenv()
 
30
  history.append((query, result["answer"]))
31
  return result["answer"]
32
 
33
+ def display_chat_history():
34
  reply_container = st.container()
35
  container = st.container()
36
 
 
48
  message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
49
  message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
50
 
51
+ def create_conversational_chain(vector_store, text_chunks, embeddings):
52
  replicate_api_token = "r8_AA3K1fhDykqLa5M74E5V0w5ss1z0P9S3foWJl" # Replace with your actual token
53
  os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
54
 
55
+ print("Length of text_chunks:", len(text_chunks))
56
+ print("Content of text_chunks:", text_chunks)
57
+
58
+ print("Length of embeddings:", len(embeddings))
59
+ print("Content of embeddings:", embeddings)
60
+
 
 
 
 
 
 
 
 
 
 
 
61
  llm = Replicate(
62
  streaming=True,
63
  model="replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
 
72
  memory=memory)
73
  return chain
74
 
 
 
75
  def main():
76
+ load_dotenv()
77
  initialize_session_state()
78
  st.title("Chat With Your Doc")
79
  st.sidebar.title("Document Processing")
 
90
  loader = None
91
  if file_extension == ".pdf":
92
  loader = PyPDFLoader(temp_file_path)
93
+ elif file_extension == ".docx" or file_extension == ".doc":
94
  loader = Docx2txtLoader(temp_file_path)
95
  elif file_extension == ".txt":
96
  loader = TextLoader(temp_file_path)
 
102
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
103
  text_chunks = text_splitter.split_documents(text)
104
 
105
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
 
106
  vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
107
+
108
+ chain = create_conversational_chain(vector_store, text_chunks, embeddings)
109
+
110
+ display_chat_history()
111
 
112
  if __name__ == "__main__":
113
  main()