Gopikanth123 commited on
Commit
534040e
·
verified ·
1 Parent(s): 655f603

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +44 -70
main.py CHANGED
@@ -1,12 +1,13 @@
1
- from flask import Flask, render_template, request, jsonify
2
- import os
3
- import shutil
4
- from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
5
- from llama_index.llms.huggingface import HuggingFaceInferenceAPI
6
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
- from huggingface_hub import InferenceClient
8
  from transformers import AutoTokenizer, AutoModel
9
 
 
10
  # Ensure HF_TOKEN is set
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
  if not HF_TOKEN:
@@ -27,13 +28,15 @@ Settings.llm = HuggingFaceInferenceAPI(
27
  max_new_tokens=512,
28
  generate_kwargs={"temperature": 0.1},
29
  )
30
-
31
- # Configure embedding model (XLM-RoBERTa model for multilingual support)
 
 
32
  Settings.embed_model = HuggingFaceEmbedding(
33
- model_name="xlm-roberta-base" # Multilingual support
34
  )
35
 
36
- # Configure tokenizer and model for multilingual responses
37
  tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
38
  model = AutoModel.from_pretrained("xlm-roberta-base")
39
 
@@ -46,80 +49,56 @@ os.makedirs(PERSIST_DIR, exist_ok=True)
46
  chat_history = []
47
  current_chat_history = []
48
 
49
- # Data ingestion function
50
  def data_ingestion_from_directory():
 
51
  if os.path.exists(PERSIST_DIR):
52
- shutil.rmtree(PERSIST_DIR) # Remove the persist directory and its contents
53
 
 
54
  os.makedirs(PERSIST_DIR, exist_ok=True)
 
 
55
  new_documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
 
 
56
  index = VectorStoreIndex.from_documents(new_documents)
 
 
57
  index.storage_context.persist(persist_dir=PERSIST_DIR)
58
 
59
- def handle_query(query, user_language):
60
  context_str = ""
61
-
62
  # Build context from current chat history
63
  for past_query, response in reversed(current_chat_history):
64
  if past_query.strip():
65
  context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
66
 
67
- # Define the chat response template based on selected language
68
- if user_language == 'te': # Telugu
69
- response_template = """
70
- మీరు తాజ్ హోటల్ చాట్‌బాట్, తాజ్ హోటల్ సహాయకుడిగా పనిచేస్తున్నారు.
71
- **మీరు చేసే పాత్ర:**
72
- - వినియోగదారుడి ప్రాముఖ్యమైన భాష (ఆంగ్లం, తెలుగు, హిందీ) లో సమాధానాలు ఇవ్వండి.
73
- - హోటల్ యొక్క సేవలు, సదుపాయాలు మరియు విధానాలపై సమాచారం ఇవ్వండి.
74
- **సూచన:**
75
- - **ప్రసంగం:**
76
- {context_str}
77
- - **వినియోగదారు ప్రశ్న:**
78
- {query_str}
79
- **సమాధానం:** [మీ సమాధానం తెలుగులో ఇక్కడ]
80
- """
81
- elif user_language == 'hi': # Hindi
82
- response_template = """
83
- आप ताज होटल के चैटबोट, ताज होटल हेल्पर हैं।
84
- **आपकी भूमिका:**
85
- - उपयोगकर्ता द्वारा चुनी गई भाषा (अंग्रेजी, हिंदी, या तेलुगु) में उत्तर दें।
86
- - होटल की सेवाओं, सुविधाओं और नीतियों के बारे में जानकारी प्रदान करें।
87
- **निर्देश:**
88
- - **संदर्भ:**
89
- {context_str}
90
- - **उपयोगकर्ता का प्रश्न:**
91
- {query_str}
92
- **उत्तर:** [आपका उत्तर हिंदी में यहाँ]
93
- """
94
- else: # Default to English
95
- response_template = """
96
- You are the Taj Hotel chatbot, Taj Hotel Helper.
97
- **Your Role:**
98
- - Respond accurately and concisely in the user's preferred language (English, Telugu, or Hindi).
99
- - Provide information about the hotel’s services, amenities, and policies.
100
- **Instructions:**
101
- - **Context:**
102
- {context_str}
103
- - **User's Question:**
104
- {query_str}
105
- **Response:** [Your concise response here]
106
- """
107
-
108
- # Create a list of chat messages with the user query and response template
109
  chat_text_qa_msgs = [
110
  (
111
  "user",
112
- response_template.format(context_str=context_str, query_str=query)
 
 
 
 
 
113
  )
114
  ]
115
 
116
- # Use the defined chat template
117
- text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
118
 
 
 
 
119
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
120
  index = load_index_from_storage(storage_context)
 
 
 
 
 
 
121
 
122
- # Query the index and retrieve the answer
123
  query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
124
  print(f"Querying: {query}")
125
  answer = query_engine.query(query)
@@ -136,17 +115,16 @@ def handle_query(query, user_language):
136
  current_chat_history.append((query, response))
137
  return response
138
 
139
-
140
  app = Flask(__name__)
141
 
142
  # Data ingestion
143
  data_ingestion_from_directory()
144
 
145
  # Generate Response
146
- def generate_response(query, language):
147
  try:
148
  # Call the handle_query function to get the response
149
- bot_response = handle_query(query, language)
150
  return bot_response
151
  except Exception as e:
152
  return f"Error fetching the response: {str(e)}"
@@ -161,17 +139,13 @@ def index():
161
  def chat():
162
  try:
163
  user_message = request.json.get("message")
164
- selected_language = request.json.get("language") # Get selected language from the request
165
  if not user_message:
166
  return jsonify({"response": "Please say something!"})
167
 
168
- if selected_language not in ['english', 'telugu', 'hindi']:
169
- return jsonify({"response": "Invalid language selected."})
170
-
171
- bot_response = generate_response(user_message, selected_language)
172
  return jsonify({"response": bot_response})
173
  except Exception as e:
174
  return jsonify({"response": f"An error occurred: {str(e)}"})
175
 
176
  if __name__ == '__main__':
177
- app.run(debug=True)
 
1
+ import os
2
+ import shutil
3
+ from flask import Flask, render_template, request, jsonify
4
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
5
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
6
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
+ from huggingface_hub import InferenceClient
8
  from transformers import AutoTokenizer, AutoModel
9
 
10
+
11
  # Ensure HF_TOKEN is set
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
  if not HF_TOKEN:
 
28
  max_new_tokens=512,
29
  generate_kwargs={"temperature": 0.1},
30
  )
31
+ # Settings.embed_model = HuggingFaceEmbedding(
32
+ # model_name="BAAI/bge-small-en-v1.5"
33
+ # )
34
+ # Replace the embedding model with XLM-R
35
  Settings.embed_model = HuggingFaceEmbedding(
36
+ model_name="xlm-roberta-base" # XLM-RoBERTa model for multilingual support
37
  )
38
 
39
+ # Configure tokenizer and model if required
40
  tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
41
  model = AutoModel.from_pretrained("xlm-roberta-base")
42
 
 
49
  chat_history = []
50
  current_chat_history = []
51
 
 
52
  def data_ingestion_from_directory():
53
+ # Clear previous data by removing the persist directory
54
  if os.path.exists(PERSIST_DIR):
55
+ shutil.rmtree(PERSIST_DIR) # Remove the persist directory and all its contents
56
 
57
+ # Recreate the persist directory after removal
58
  os.makedirs(PERSIST_DIR, exist_ok=True)
59
+
60
+ # Load new documents from the directory
61
  new_documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
62
+
63
+ # Create a new index with the new documents
64
  index = VectorStoreIndex.from_documents(new_documents)
65
+
66
+ # Persist the new index
67
  index.storage_context.persist(persist_dir=PERSIST_DIR)
68
 
69
+ def handle_query(query):
70
  context_str = ""
71
+
72
  # Build context from current chat history
73
  for past_query, response in reversed(current_chat_history):
74
  if past_query.strip():
75
  context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  chat_text_qa_msgs = [
78
  (
79
  "user",
80
+ """
81
+ You are the Hotel voice chatbot and your name is hotel helper. Your goal is to provide accurate, professional, and helpful answers to user queries based on the hotel's data. Always ensure your responses are clear and concise. Give response within 10-15 words only. You need to give an answer in the same language used by the user.
82
+ {context_str}
83
+ Question:
84
+ {query_str}
85
+ """
86
  )
87
  ]
88
 
 
 
89
 
90
+
91
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
92
+
93
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
94
  index = load_index_from_storage(storage_context)
95
+ # context_str = ""
96
+
97
+ # # Build context from current chat history
98
+ # for past_query, response in reversed(current_chat_history):
99
+ # if past_query.strip():
100
+ # context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
101
 
 
102
  query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
103
  print(f"Querying: {query}")
104
  answer = query_engine.query(query)
 
115
  current_chat_history.append((query, response))
116
  return response
117
 
 
118
  app = Flask(__name__)
119
 
120
  # Data ingestion
121
  data_ingestion_from_directory()
122
 
123
  # Generate Response
124
+ def generate_response(query):
125
  try:
126
  # Call the handle_query function to get the response
127
+ bot_response = handle_query(query)
128
  return bot_response
129
  except Exception as e:
130
  return f"Error fetching the response: {str(e)}"
 
139
  def chat():
140
  try:
141
  user_message = request.json.get("message")
 
142
  if not user_message:
143
  return jsonify({"response": "Please say something!"})
144
 
145
+ bot_response = generate_response(user_message)
 
 
 
146
  return jsonify({"response": bot_response})
147
  except Exception as e:
148
  return jsonify({"response": f"An error occurred: {str(e)}"})
149
 
150
  if __name__ == '__main__':
151
+ app.run(debug=True)