Spaces:

KvrParaskevi
/

Hotel-Booking-Assistant

Paused

App Files Files Community

KvrParaskevi commited on Apr 23, 2024

Commit

63159de

verified ·

1 Parent(s): 6afafbb

Update chatbot.py

Browse files

Files changed (1) hide show

chatbot.py +51 -18

chatbot.py CHANGED Viewed

@@ -2,41 +2,74 @@ import os
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationChain
 import langchain.globals
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from transformers import AutoModelForCausalLM, AutoTokenizer,  BitsAndBytesConfig, pipeline
-import streamlit as st
-my_model_id = os.getenv('MODEL_REPO_ID', 'Default Value')
 token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
-@st.cache_resource
 def load_model():
     quantization_config = BitsAndBytesConfig(
     load_in_8bit=True,
     # bnb_4bit_compute_dtype=torch.bfloat16
     )
     tokenizer = AutoTokenizer.from_pretrained(my_model_id)
-    model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto") #,quantization_config=quantization_config
     return tokenizer,model
-@st.cache_resource
 def load_pipeline():
-    pipe = pipeline("text-generation", model="KvrParaskevi/Llama-2-7b-Hotel-Booking-Model", max_tokens = 50,top_k = 30, temperature = 0.1,repetition_penalty = 1.03)
-    hf = HuggingFacePipeline(pipeline=pipe)
-    return hf
-def generate_from_pipeline(text, pipe):
-    return pipe(text)
-def demo_miny_memory(model):
-    # llm_data = get_Model(hugging_face_key)
-    memory = ConversationBufferMemory(llm = model,max_token_limit = 512)
     return memory
-def demo_chain(input_text, memory,model):
-    # llm_data = get_Model(hugging_face_key)
-    llm_conversation = ConversationChain(llm=model,memory=memory,verbose=langchain.globals.get_verbose())
-    chat_reply = llm_conversation.predict(input=input_text)
-    return chat_reply

 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationChain
 import langchain.globals
+from langchain.prompts import PromptTemplate
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from transformers import AutoModelForCausalLM, AutoTokenizer,  BitsAndBytesConfig, pipeline
+#import streamlit as st
+my_model_id = "KvrParaskevi/Llama-2-7b-Hotel-Booking-Model" #os.getenv('MODEL_REPO_ID', 'Default Value')
 token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+template = """<<SYS>>
+You are an AI having conversation with a human. Below is an instruction that describes a task.
+Write a response that appropriately completes the request.
+Reply with the most helpful and logic answer. During the conversation you need to ask the user
+the following questions to complete the hotel booking task.
+1) Where would you like to stay and when?
+2) How many people are staying in the room?
+3) Do you prefer any ammenities like breakfast included or gym?
+4) What is your name, your email address and phone number?
+Make sure you receive a logical answer from the user from every question to complete the hotel
+booking process.
+<</SYS>>
+Previous conversation:
+{chat_history}
+Human: {question}
+AI:"""
+#@st.cache_resource
 def load_model():
     quantization_config = BitsAndBytesConfig(
     load_in_8bit=True,
     # bnb_4bit_compute_dtype=torch.bfloat16
     )
     tokenizer = AutoTokenizer.from_pretrained(my_model_id)
+    model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto",quantization_config=quantization_config) #
     return tokenizer,model
+#@st.cache_resource
 def load_pipeline():
+    tokenizer, model = load_model()
+    pipe = pipeline("text-generation",
+                    model= model,
+                    tokenizer = tokenizer,
+                    max_tokens = 50,top_k = 30, early_stopping=True,
+                    temperature = 0.1,repetition_penalty = 1.03)
+    llm = HuggingFacePipeline(pipeline=pipe)
+    return llm
+# def generate_from_pipeline(text, pipe):
+#     return pipe(text)
+llm = load_pipeline()
+def demo_miny_memory():
+    prompt = PromptTemplate.from_template(template)
+    memory = ConversationBufferMemory(memory_key="chat_history", llm = llm)
     return memory
+def demo_chain(input_text):
+    conversation = ConversationChain(
+        llm=llm,
+        verbose=True,
+        memory=demo_miny_memory()
+    )
+    chat_reply = conversation(input=input_text)
+    return chat_reply['response']