JBHF commited on
Commit
7ce9490
1 Parent(s): ccba092

CREATED app-ORIGINAL.py - 12-04-2024

Browse files
Files changed (1) hide show
  1. app-ORIGINAL.py +143 -0
app-ORIGINAL.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app-ORIGINAL.py - 12-04-2024
2
+
3
+ import os
4
+ from typing import List
5
+
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.vectorstores import Chroma
9
+ from langchain.chains import (
10
+ ConversationalRetrievalChain,
11
+ )
12
+ from langchain.document_loaders import PyPDFLoader
13
+ from langchain.chat_models import ChatOpenAI
14
+ from langchain.prompts.chat import (
15
+ ChatPromptTemplate,
16
+ SystemMessagePromptTemplate,
17
+ HumanMessagePromptTemplate,
18
+ )
19
+ from langchain.docstore.document import Document
20
+ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
21
+ from chainlit.types import AskFileResponse
22
+
23
+ import chainlit as cl
24
+
25
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
26
+
27
+ system_template = """Use the following pieces of context to answer the users question.
28
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
29
+ ALWAYS return a "SOURCES" part in your answer.
30
+ The "SOURCES" part should be a reference to the source of the document from which you got your answer.
31
+ And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
32
+ Example of your response should be:
33
+ The answer is foo
34
+ SOURCES: xyz
35
+ Begin!
36
+ ----------------
37
+ {summaries}"""
38
+ messages = [
39
+ SystemMessagePromptTemplate.from_template(system_template),
40
+ HumanMessagePromptTemplate.from_template("{question}"),
41
+ ]
42
+ prompt = ChatPromptTemplate.from_messages(messages)
43
+ chain_type_kwargs = {"prompt": prompt}
44
+
45
+
46
+ def process_file(file: AskFileResponse):
47
+ import tempfile
48
+
49
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
50
+ with open(tempfile.name, "wb") as f:
51
+ f.write(file.content)
52
+
53
+ pypdf_loader = PyPDFLoader(tempfile.name)
54
+ texts = pypdf_loader.load_and_split()
55
+ texts = [text.page_content for text in texts]
56
+ return texts
57
+
58
+
59
+ @cl.on_chat_start
60
+ async def on_chat_start():
61
+ files = None
62
+
63
+ # Wait for the user to upload a file
64
+ while files == None:
65
+ files = await cl.AskFileMessage(
66
+ content="Please upload a PDF file to begin!",
67
+ accept=["application/pdf"],
68
+ max_size_mb=20,
69
+ timeout=180,
70
+ ).send()
71
+
72
+ file = files[0]
73
+
74
+ msg = cl.Message(
75
+ content=f"Processing `{file.name}`...", disable_human_feedback=True
76
+ )
77
+ await msg.send()
78
+
79
+ # load the file
80
+ texts = process_file(file)
81
+
82
+ print(texts[0])
83
+
84
+ # Create a metadata for each chunk
85
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
86
+
87
+ # Create a Chroma vector store
88
+ embeddings = OpenAIEmbeddings()
89
+ docsearch = await cl.make_async(Chroma.from_texts)(
90
+ texts, embeddings, metadatas=metadatas
91
+ )
92
+
93
+ message_history = ChatMessageHistory()
94
+
95
+ memory = ConversationBufferMemory(
96
+ memory_key="chat_history",
97
+ output_key="answer",
98
+ chat_memory=message_history,
99
+ return_messages=True,
100
+ )
101
+
102
+ # Create a chain that uses the Chroma vector store
103
+ chain = ConversationalRetrievalChain.from_llm(
104
+ ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
105
+ chain_type="stuff",
106
+ retriever=docsearch.as_retriever(),
107
+ memory=memory,
108
+ return_source_documents=True,
109
+ )
110
+
111
+ # Let the user know that the system is ready
112
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
113
+ await msg.update()
114
+
115
+ cl.user_session.set("chain", chain)
116
+
117
+
118
+ @cl.on_message
119
+ async def main(message):
120
+ chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
121
+ cb = cl.AsyncLangchainCallbackHandler()
122
+
123
+ res = await chain.acall(message.content, callbacks=[cb])
124
+ answer = res["answer"]
125
+ source_documents = res["source_documents"] # type: List[Document]
126
+
127
+ text_elements = [] # type: List[cl.Text]
128
+
129
+ if source_documents:
130
+ for source_idx, source_doc in enumerate(source_documents):
131
+ source_name = f"source_{source_idx}"
132
+ # Create the text element referenced in the message
133
+ text_elements.append(
134
+ cl.Text(content=source_doc.page_content, name=source_name)
135
+ )
136
+ source_names = [text_el.name for text_el in text_elements]
137
+
138
+ if source_names:
139
+ answer += f"\nSources: {', '.join(source_names)}"
140
+ else:
141
+ answer += "\nNo sources found"
142
+
143
+ await cl.Message(content=answer, elements=text_elements).send()