JBHF commited on
Commit
2cecfe5
1 Parent(s): 1f5fbd7

Update app-12-04-2024-19u45m-CET.py

Browse files
Files changed (1) hide show
  1. app-12-04-2024-19u45m-CET.py +47 -94
app-12-04-2024-19u45m-CET.py CHANGED
@@ -1,20 +1,20 @@
1
  # app.py-12-04-2024-19u45m-CET.py
 
 
 
 
2
 
 
 
3
  import os
4
  from typing import List
5
-
6
- # from langchain.embeddings.openai import OpenAIEmbeddings # ORIGINAL
7
- from langchain_community.embeddings import FastEmbedEmbeddings # JB
8
-
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain.vectorstores import Chroma
11
- from langchain.chains import (
12
- ConversationalRetrievalChain,
13
- )
14
  from langchain.document_loaders import PyPDFLoader
15
- # from langchain.chat_models import ChatOpenAI # ORIGINAL
16
- from langchain_groq import ChatGroq # JB
17
-
18
  from langchain.prompts.chat import (
19
  ChatPromptTemplate,
20
  SystemMessagePromptTemplate,
@@ -22,91 +22,60 @@ from langchain.prompts.chat import (
22
  )
23
  from langchain.docstore.document import Document
24
  from langchain.memory import ChatMessageHistory, ConversationBufferMemory
25
- from chainlit.types import AskFileResponse
26
-
27
- import chainlit as cl
28
 
29
- # JB
30
- from dotenv import load_dotenv
31
- import glob
32
- load_dotenv() #
33
- groq_api_key = os.environ['GROQ_API_KEY']
34
- # groq_api_key = "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" # os.environ['GROQ_API_KEY']
35
- print"groq_api_key: ", groq_api_key)
36
 
37
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
38
 
39
- system_template = """Use the following pieces of context to answer the users question.
40
  If you don't know the answer, just say that you don't know, don't try to make up an answer.
41
  ALWAYS return a "SOURCES" part in your answer.
42
  The "SOURCES" part should be a reference to the source of the document from which you got your answer.
43
-
44
  And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
45
-
46
  Example of your response should be:
47
-
48
  The answer is foo
49
  SOURCES: xyz
50
-
51
-
52
  Begin!
53
  ----------------
54
  {summaries}"""
 
55
  messages = [
56
  SystemMessagePromptTemplate.from_template(system_template),
57
  HumanMessagePromptTemplate.from_template("{question}"),
58
  ]
 
59
  prompt = ChatPromptTemplate.from_messages(messages)
60
  chain_type_kwargs = {"prompt": prompt}
61
 
62
 
63
- def process_file(file: AskFileResponse):
64
- import tempfile
65
-
66
- with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
67
- with open(tempfile.name, "wb") as f:
68
- f.write(file.content)
69
 
70
- pypdf_loader = PyPDFLoader(tempfile.name)
71
  texts = pypdf_loader.load_and_split()
72
  texts = [text.page_content for text in texts]
73
  return texts
74
 
75
 
76
- @cl.on_chat_start
77
- async def on_chat_start():
78
- files = None
79
 
80
- # Wait for the user to upload a file
81
- while files == None:
82
- files = await cl.AskFileMessage(
83
- content="Please upload a PDF file to begin!",
84
- accept=["application/pdf"],
85
- max_size_mb=20,
86
- timeout=180,
87
- ).send()
88
 
89
  file = files[0]
90
 
91
- msg = cl.Message(
92
- content=f"Processing `{file.name}`...", disable_human_feedback=True
93
- )
94
- await msg.send()
95
 
96
- # load the file
97
  texts = process_file(file)
98
 
99
- print(texts[0])
100
-
101
  # Create a metadata for each chunk
102
  metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
103
 
104
- # Create a Chroma vector store
105
- # embeddings = OpenAIEmbeddings() # ORIGINAL
106
- embeddings = FastEmbedEmbeddings # JB
107
- docsearch = await cl.make_async(Chroma.from_texts)(
108
- texts, embeddings, metadatas=metadatas
109
- )
110
 
111
  message_history = ChatMessageHistory()
112
 
@@ -117,52 +86,36 @@ async def on_chat_start():
117
  return_messages=True,
118
  )
119
 
120
-
121
-
122
- # JB
123
- # llm = ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768')
124
-
125
-
126
- # Create a chain that uses the Chroma vector store
127
  chain = ConversationalRetrievalChain.from_llm(
128
- # ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True), # ORIGINAL
129
- ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768', streaming=True), # JB
130
  chain_type="stuff",
131
  retriever=docsearch.as_retriever(),
132
  memory=memory,
133
  return_source_documents=True,
134
  )
135
 
136
- # Let the user know that the system is ready
137
- msg.content = f"Processing `{file.name}` done. You can now ask questions!"
138
- await msg.update()
139
-
140
- cl.user_session.set("chain", chain)
141
-
142
-
143
- @cl.on_message
144
- async def main(message):
145
- chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
146
- cb = cl.AsyncLangchainCallbackHandler()
147
 
148
- res = await chain.acall(message.content, callbacks=[cb])
149
- answer = res["answer"]
150
- source_documents = res["source_documents"] # type: List[Document]
 
 
 
151
 
152
- text_elements = [] # type: List[cl.Text]
153
 
154
- if source_documents:
155
- for source_idx, source_doc in enumerate(source_documents):
156
- source_name = f"source_{source_idx}"
157
- # Create the text element referenced in the message
158
- text_elements.append(
159
- cl.Text(content=source_doc.page_content, name=source_name)
160
- )
161
- source_names = [text_el.name for text_el in text_elements]
162
 
163
- if source_names:
164
- answer += f"\nSources: {', '.join(source_names)}"
165
- else:
166
- answer += "\nNo sources found"
167
 
168
- await cl.Message(content=answer, elements=text_elements).send()
 
 
 
1
  # app.py-12-04-2024-19u45m-CET.py
2
+ #
3
+ # POE ChatGPT:
4
+ # To convert the code from a Chainlit app to a Streamlit app, you'll need to make several modifications.
5
+ # Here's the modified code for a Streamlit app:
6
 
7
+ python
8
+ Copy
9
  import os
10
  from typing import List
11
+ import streamlit as st
12
+ from langchain_community.embeddings import FastEmbedEmbeddings
 
 
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
  from langchain.vectorstores import Chroma
15
+ from langchain.chains import ConversationalRetrievalChain
 
 
16
  from langchain.document_loaders import PyPDFLoader
17
+ from langchain_groq import ChatGroq
 
 
18
  from langchain.prompts.chat import (
19
  ChatPromptTemplate,
20
  SystemMessagePromptTemplate,
 
22
  )
23
  from langchain.docstore.document import Document
24
  from langchain.memory import ChatMessageHistory, ConversationBufferMemory
 
 
 
25
 
26
+ st.title("Chat App")
27
+ st.write("Upload a PDF file to begin!")
 
 
 
 
 
28
 
29
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
30
 
31
+ system_template = """Use the following pieces of context to answer the user's question.
32
  If you don't know the answer, just say that you don't know, don't try to make up an answer.
33
  ALWAYS return a "SOURCES" part in your answer.
34
  The "SOURCES" part should be a reference to the source of the document from which you got your answer.
 
35
  And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
 
36
  Example of your response should be:
 
37
  The answer is foo
38
  SOURCES: xyz
 
 
39
  Begin!
40
  ----------------
41
  {summaries}"""
42
+
43
  messages = [
44
  SystemMessagePromptTemplate.from_template(system_template),
45
  HumanMessagePromptTemplate.from_template("{question}"),
46
  ]
47
+
48
  prompt = ChatPromptTemplate.from_messages(messages)
49
  chain_type_kwargs = {"prompt": prompt}
50
 
51
 
52
+ def process_file(file):
53
+ with open(file.name, "wb") as f:
54
+ f.write(file.read())
 
 
 
55
 
56
+ pypdf_loader = PyPDFLoader(file.name)
57
  texts = pypdf_loader.load_and_split()
58
  texts = [text.page_content for text in texts]
59
  return texts
60
 
61
 
62
+ def main():
63
+ files = st.file_uploader("Upload PDF File", type="pdf", key="pdf_upload")
 
64
 
65
+ if not files:
66
+ return
 
 
 
 
 
 
67
 
68
  file = files[0]
69
 
70
+ st.write(f"Processing `{file.name}`...")
 
 
 
71
 
 
72
  texts = process_file(file)
73
 
 
 
74
  # Create a metadata for each chunk
75
  metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
76
 
77
+ embeddings = FastEmbedEmbeddings()
78
+ docsearch = Chroma.from_texts(texts, embeddings, metadatas=metadatas)
 
 
 
 
79
 
80
  message_history = ChatMessageHistory()
81
 
 
86
  return_messages=True,
87
  )
88
 
 
 
 
 
 
 
 
89
  chain = ConversationalRetrievalChain.from_llm(
90
+ ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768', streaming=True),
 
91
  chain_type="stuff",
92
  retriever=docsearch.as_retriever(),
93
  memory=memory,
94
  return_source_documents=True,
95
  )
96
 
97
+ st.write(f"Processing `{file.name}` done. You can now ask questions!")
 
 
 
 
 
 
 
 
 
 
98
 
99
+ while True:
100
+ user_input = st.text_input("User Input")
101
+ if st.button("Send"):
102
+ res = chain.call(user_input)
103
+ answer = res["answer"]
104
+ source_documents = res["source_documents"]
105
 
106
+ text_elements = []
107
 
108
+ if source_documents:
109
+ for source_idx, source_doc in enumerate(source_documents):
110
+ source_name = f"source_{source_idx}"
111
+ text_elements.append(Document(content=source_doc.page_content, name=source_name))
112
+ source_names = [text_el.name for text_el in text_elements]
 
 
 
113
 
114
+ if source_names:
115
+ answer += f"\nSources: {', '.join(source_names)}"
116
+ else:
117
+ answer += "\nNo sources found"
118
 
119
+ st.write(answer)
120
+ for source_doc in source_documents:
121
+ st.write(source_doc.page_content)