JBHF commited on
Commit
1f5fbd7
1 Parent(s): 67d9bfa

Create app-12-04-2024-19u45m-CET.py

Browse files
Files changed (1) hide show
  1. app-12-04-2024-19u45m-CET.py +168 -0
app-12-04-2024-19u45m-CET.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py-12-04-2024-19u45m-CET.py
2
+
3
+ import os
4
+ from typing import List
5
+
6
+ # from langchain.embeddings.openai import OpenAIEmbeddings # ORIGINAL
7
+ from langchain_community.embeddings import FastEmbedEmbeddings # JB
8
+
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.vectorstores import Chroma
11
+ from langchain.chains import (
12
+ ConversationalRetrievalChain,
13
+ )
14
+ from langchain.document_loaders import PyPDFLoader
15
+ # from langchain.chat_models import ChatOpenAI # ORIGINAL
16
+ from langchain_groq import ChatGroq # JB
17
+
18
+ from langchain.prompts.chat import (
19
+ ChatPromptTemplate,
20
+ SystemMessagePromptTemplate,
21
+ HumanMessagePromptTemplate,
22
+ )
23
+ from langchain.docstore.document import Document
24
+ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
25
+ from chainlit.types import AskFileResponse
26
+
27
+ import chainlit as cl
28
+
29
+ # JB
30
+ from dotenv import load_dotenv
31
+ import glob
32
+ load_dotenv() #
33
+ groq_api_key = os.environ['GROQ_API_KEY']
34
+ # groq_api_key = "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" # os.environ['GROQ_API_KEY']
35
+ print"groq_api_key: ", groq_api_key)
36
+
37
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
38
+
39
+ system_template = """Use the following pieces of context to answer the users question.
40
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
41
+ ALWAYS return a "SOURCES" part in your answer.
42
+ The "SOURCES" part should be a reference to the source of the document from which you got your answer.
43
+
44
+ And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
45
+
46
+ Example of your response should be:
47
+
48
+ The answer is foo
49
+ SOURCES: xyz
50
+
51
+
52
+ Begin!
53
+ ----------------
54
+ {summaries}"""
55
+ messages = [
56
+ SystemMessagePromptTemplate.from_template(system_template),
57
+ HumanMessagePromptTemplate.from_template("{question}"),
58
+ ]
59
+ prompt = ChatPromptTemplate.from_messages(messages)
60
+ chain_type_kwargs = {"prompt": prompt}
61
+
62
+
63
+ def process_file(file: AskFileResponse):
64
+ import tempfile
65
+
66
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
67
+ with open(tempfile.name, "wb") as f:
68
+ f.write(file.content)
69
+
70
+ pypdf_loader = PyPDFLoader(tempfile.name)
71
+ texts = pypdf_loader.load_and_split()
72
+ texts = [text.page_content for text in texts]
73
+ return texts
74
+
75
+
76
+ @cl.on_chat_start
77
+ async def on_chat_start():
78
+ files = None
79
+
80
+ # Wait for the user to upload a file
81
+ while files == None:
82
+ files = await cl.AskFileMessage(
83
+ content="Please upload a PDF file to begin!",
84
+ accept=["application/pdf"],
85
+ max_size_mb=20,
86
+ timeout=180,
87
+ ).send()
88
+
89
+ file = files[0]
90
+
91
+ msg = cl.Message(
92
+ content=f"Processing `{file.name}`...", disable_human_feedback=True
93
+ )
94
+ await msg.send()
95
+
96
+ # load the file
97
+ texts = process_file(file)
98
+
99
+ print(texts[0])
100
+
101
+ # Create a metadata for each chunk
102
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
103
+
104
+ # Create a Chroma vector store
105
+ # embeddings = OpenAIEmbeddings() # ORIGINAL
106
+ embeddings = FastEmbedEmbeddings # JB
107
+ docsearch = await cl.make_async(Chroma.from_texts)(
108
+ texts, embeddings, metadatas=metadatas
109
+ )
110
+
111
+ message_history = ChatMessageHistory()
112
+
113
+ memory = ConversationBufferMemory(
114
+ memory_key="chat_history",
115
+ output_key="answer",
116
+ chat_memory=message_history,
117
+ return_messages=True,
118
+ )
119
+
120
+
121
+
122
+ # JB
123
+ # llm = ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768')
124
+
125
+
126
+ # Create a chain that uses the Chroma vector store
127
+ chain = ConversationalRetrievalChain.from_llm(
128
+ # ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True), # ORIGINAL
129
+ ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768', streaming=True), # JB
130
+ chain_type="stuff",
131
+ retriever=docsearch.as_retriever(),
132
+ memory=memory,
133
+ return_source_documents=True,
134
+ )
135
+
136
+ # Let the user know that the system is ready
137
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
138
+ await msg.update()
139
+
140
+ cl.user_session.set("chain", chain)
141
+
142
+
143
+ @cl.on_message
144
+ async def main(message):
145
+ chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
146
+ cb = cl.AsyncLangchainCallbackHandler()
147
+
148
+ res = await chain.acall(message.content, callbacks=[cb])
149
+ answer = res["answer"]
150
+ source_documents = res["source_documents"] # type: List[Document]
151
+
152
+ text_elements = [] # type: List[cl.Text]
153
+
154
+ if source_documents:
155
+ for source_idx, source_doc in enumerate(source_documents):
156
+ source_name = f"source_{source_idx}"
157
+ # Create the text element referenced in the message
158
+ text_elements.append(
159
+ cl.Text(content=source_doc.page_content, name=source_name)
160
+ )
161
+ source_names = [text_el.name for text_el in text_elements]
162
+
163
+ if source_names:
164
+ answer += f"\nSources: {', '.join(source_names)}"
165
+ else:
166
+ answer += "\nNo sources found"
167
+
168
+ await cl.Message(content=answer, elements=text_elements).send()