KvrParaskevi
commited on
Update chatbot.py
Browse files- chatbot.py +51 -18
chatbot.py
CHANGED
@@ -2,41 +2,74 @@ import os
|
|
2 |
from langchain.memory import ConversationBufferMemory
|
3 |
from langchain.chains import ConversationChain
|
4 |
import langchain.globals
|
|
|
5 |
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
|
7 |
-
import streamlit as st
|
8 |
|
9 |
-
my_model_id = os.getenv('MODEL_REPO_ID', 'Default Value')
|
10 |
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def load_model():
|
14 |
quantization_config = BitsAndBytesConfig(
|
15 |
load_in_8bit=True,
|
16 |
# bnb_4bit_compute_dtype=torch.bfloat16
|
17 |
)
|
18 |
tokenizer = AutoTokenizer.from_pretrained(my_model_id)
|
19 |
-
model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto"
|
20 |
|
21 |
return tokenizer,model
|
22 |
|
23 |
-
|
24 |
def load_pipeline():
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
def generate_from_pipeline(text, pipe):
|
30 |
-
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
35 |
return memory
|
36 |
|
37 |
-
def demo_chain(input_text
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
-
chat_reply =
|
42 |
-
return chat_reply
|
|
|
2 |
from langchain.memory import ConversationBufferMemory
|
3 |
from langchain.chains import ConversationChain
|
4 |
import langchain.globals
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
|
8 |
+
#import streamlit as st
|
9 |
|
10 |
+
my_model_id = "KvrParaskevi/Llama-2-7b-Hotel-Booking-Model" #os.getenv('MODEL_REPO_ID', 'Default Value')
|
11 |
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
12 |
|
13 |
+
template = """<<SYS>>
|
14 |
+
You are an AI having conversation with a human. Below is an instruction that describes a task.
|
15 |
+
Write a response that appropriately completes the request.
|
16 |
+
Reply with the most helpful and logic answer. During the conversation you need to ask the user
|
17 |
+
the following questions to complete the hotel booking task.
|
18 |
+
|
19 |
+
1) Where would you like to stay and when?
|
20 |
+
2) How many people are staying in the room?
|
21 |
+
3) Do you prefer any ammenities like breakfast included or gym?
|
22 |
+
4) What is your name, your email address and phone number?
|
23 |
+
|
24 |
+
Make sure you receive a logical answer from the user from every question to complete the hotel
|
25 |
+
booking process.
|
26 |
+
<</SYS>>
|
27 |
+
|
28 |
+
Previous conversation:
|
29 |
+
{chat_history}
|
30 |
+
|
31 |
+
Human: {question}
|
32 |
+
AI:"""
|
33 |
+
|
34 |
+
#@st.cache_resource
|
35 |
def load_model():
|
36 |
quantization_config = BitsAndBytesConfig(
|
37 |
load_in_8bit=True,
|
38 |
# bnb_4bit_compute_dtype=torch.bfloat16
|
39 |
)
|
40 |
tokenizer = AutoTokenizer.from_pretrained(my_model_id)
|
41 |
+
model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto",quantization_config=quantization_config) #
|
42 |
|
43 |
return tokenizer,model
|
44 |
|
45 |
+
#@st.cache_resource
|
46 |
def load_pipeline():
|
47 |
+
tokenizer, model = load_model()
|
48 |
+
pipe = pipeline("text-generation",
|
49 |
+
model= model,
|
50 |
+
tokenizer = tokenizer,
|
51 |
+
max_tokens = 50,top_k = 30, early_stopping=True,
|
52 |
+
temperature = 0.1,repetition_penalty = 1.03)
|
53 |
+
|
54 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
55 |
+
return llm
|
56 |
|
57 |
+
# def generate_from_pipeline(text, pipe):
|
58 |
+
# return pipe(text)
|
59 |
|
60 |
+
llm = load_pipeline()
|
61 |
+
|
62 |
+
def demo_miny_memory():
|
63 |
+
prompt = PromptTemplate.from_template(template)
|
64 |
+
memory = ConversationBufferMemory(memory_key="chat_history", llm = llm)
|
65 |
return memory
|
66 |
|
67 |
+
def demo_chain(input_text):
|
68 |
+
conversation = ConversationChain(
|
69 |
+
llm=llm,
|
70 |
+
verbose=True,
|
71 |
+
memory=demo_miny_memory()
|
72 |
+
)
|
73 |
|
74 |
+
chat_reply = conversation(input=input_text)
|
75 |
+
return chat_reply['response']
|