paloma99 commited on
Commit
e190921
·
verified ·
1 Parent(s): 03ba5b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -3
app.py CHANGED
@@ -6,9 +6,33 @@ import theme
6
  theme = theme.Theme()
7
 
8
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
11
 
 
 
12
 
13
 
14
 
@@ -29,11 +53,81 @@ image_gradio_app = gr.Interface(
29
 
30
  # Cell 2: Chatbot Model
31
 
32
- def echo(message, history):
33
- return message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  chatbot_gradio_app = gr.ChatInterface(
36
- fn=echo,
37
  title='Green Greta'
38
  )
39
 
 
6
  theme = theme.Theme()
7
 
8
 
9
+ import os
10
+ import sys
11
+ sys.path.append('../..')
12
 
13
+ #langchain
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
15
+ from langchain.embeddings import HuggingFaceEmbeddings
16
+ from langchain.prompts import PromptTemplate
17
+ from langchain.chains import RetrievalQA
18
+ from langchain.prompts import ChatPromptTemplate
19
+ from langchain.schema import StrOutputParser
20
+ from langchain.schema.runnable import Runnable
21
+ from langchain.schema.runnable.config import RunnableConfig
22
+ from langchain.chains import (
23
+ LLMChain, ConversationalRetrievalChain)
24
+ from langchain.vectorstores import Chroma
25
+ from langchain.memory import ConversationBufferMemory
26
+ from langchain.chains import LLMChain
27
+ from langchain.prompts.prompt import PromptTemplate
28
+ from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate
29
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder
30
+ from langchain.document_loaders import PyPDFDirectoryLoader
31
 
32
+ from langchain_community.llms import HuggingFaceHub
33
 
34
+ from pydantic import BaseModel
35
+ import shutil
36
 
37
 
38
 
 
53
 
54
  # Cell 2: Chatbot Model
55
 
56
+ loader = PyPDFDirectoryLoader('pdfs')
57
+ data=loader.load()
58
+ # split documents
59
+ text_splitter = RecursiveCharacterTextSplitter(
60
+ chunk_size=500,
61
+ chunk_overlap=70,
62
+ length_function=len
63
+ )
64
+ docs = text_splitter.split_documents(data)
65
+ # define embedding
66
+ embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
67
+ # create vector database from data
68
+ persist_directory = 'docs/chroma/'
69
+
70
+ # Remove old database files if any
71
+ shutil.rmtree(persist_directory, ignore_errors=True)
72
+ vectordb = Chroma.from_documents(
73
+ documents=docs,
74
+ embedding=embeddings,
75
+ persist_directory=persist_directory
76
+ )
77
+ # define retriever
78
+ retriever = vectordb.as_retriever(search_type="mmr")
79
+ template = """
80
+ Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
81
+ Use the following pieces of context to answer the question if the question is related with recycling /
82
+ No more than two chunks of context /
83
+ Answer in the same language of the question /
84
+ Always say "thanks for asking!" at the end of the answer /
85
+ If the context is not relevant, please answer the question by using your own knowledge about the topic.
86
+
87
+ context: {context}
88
+ question: {question}
89
+ """
90
+
91
+ # Create the chat prompt templates
92
+ system_prompt = SystemMessagePromptTemplate.from_template(template)
93
+ qa_prompt = ChatPromptTemplate(
94
+ messages=[
95
+ system_prompt,
96
+ MessagesPlaceholder(variable_name="chat_history"),
97
+ HumanMessagePromptTemplate.from_template("{question}")
98
+ ]
99
+ )
100
+ llm = HuggingFaceHub(
101
+ repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
102
+ task="text-generation",
103
+ model_kwargs={
104
+ "max_new_tokens": 1024,
105
+ "top_k": 30,
106
+ "temperature": 0.1,
107
+ "repetition_penalty": 1.03,
108
+ },
109
+ )
110
+
111
+ memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='answer', return_messages=True)
112
+
113
+ qa_chain = ConversationalRetrievalChain.from_llm(
114
+ llm = llm,
115
+ memory = memory,
116
+ retriever = retriever,
117
+ verbose = True,
118
+ combine_docs_chain_kwargs={'prompt': qa_prompt},
119
+ get_chat_history = lambda h : h,
120
+ rephrase_question = False,
121
+ output_key = 'answer'
122
+ )
123
+
124
+ def chat_interface(question,history):
125
+
126
+ result = qa_chain.invoke({"question": question})
127
+ return result['answer'] # If the result is a string, return it directly
128
 
129
  chatbot_gradio_app = gr.ChatInterface(
130
+ fn=chat_interface,
131
  title='Green Greta'
132
  )
133