kheopss commited on
Commit
5b58187
·
verified ·
1 Parent(s): 2c56e55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -106,7 +106,7 @@ def estimate_tokens(text):
106
  def process_final(user_prom,history):
107
  import time
108
  all_process_start = time.time()
109
- system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents, citing them with <co: doc_id></co> tags. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
110
  new_nodes = get_retrieved_nodes(
111
  user_prom,
112
  vector_top_k=5,
@@ -116,35 +116,34 @@ def process_final(user_prom,history):
116
  get_texts = get_all_text(new_nodes)
117
 
118
  print("PHASE 03 passing to LLM\n")
119
- prompt_f = f"<|im_start|>system \n{system_p}\n DOCUMENTS {get_texts}\n<|im_end|>"
 
120
  total_tokens = estimate_tokens(prompt_f)
121
-
122
- # Ajout de l'historique en commençant par les plus récents
123
  for val in reversed(history):
124
- user_text = f" <|im_start|>user \n {val[0]}\n<|im_end|>" if val[0] else ""
125
- assistant_text = f" <|im_start|>assistant \n {val[1]}\n<|im_end|>" if val[1] else ""
126
-
127
- current_tokens = estimate_tokens(user_text + assistant_text)
128
-
129
-
130
  # Vérifier si l'ajout de cet historique dépasse la limite
131
  if total_tokens + current_tokens > 3000:
132
  break # Arrêter l'ajout si on dépasse la limite
133
  else:
134
  # Ajouter à `prompt_f` et mettre à jour le nombre total de tokens
135
- prompt_f += user_text + assistant_text
136
  total_tokens += current_tokens
137
- print (f"le nombre TOTAL de token : {total_tokens}\n")
138
- prompt_f+=f" <|im_start|>user \n{user_prom} \n<|im_end|><|im_start|>assistant \n"
139
-
140
  phase_03_start = time.time()
141
  gen =llm.stream_complete(formatted=True, prompt=prompt_f)
 
142
  print("_"*100)
143
  print(prompt_f)
144
  print("o"*100)
145
  for response in gen:
146
  yield response.text
147
 
 
148
  description = """
149
  <p>
150
  <center>
 
106
  def process_final(user_prom,history):
107
  import time
108
  all_process_start = time.time()
109
+ system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
110
  new_nodes = get_retrieved_nodes(
111
  user_prom,
112
  vector_top_k=5,
 
116
  get_texts = get_all_text(new_nodes)
117
 
118
  print("PHASE 03 passing to LLM\n")
119
+ sys_p = f"<|im_start|>system \n{system_p}\n DOCUMENTS {get_texts}\n<|im_end|>"
120
+ prompt_f=""
121
  total_tokens = estimate_tokens(prompt_f)
122
+
 
123
  for val in reversed(history):
124
+ if val[0]:
125
+ user_p = f" <|im_start|>user \n {val[0]}\n<|im_end|>"
126
+ if val[1]:
127
+ assistant_p = f" <|im_start|>assistant \n {val[1]}\n<|im_end|>"
128
+ current_tokens = estimate_tokens(user_p+assistant_p)
 
129
  # Vérifier si l'ajout de cet historique dépasse la limite
130
  if total_tokens + current_tokens > 3000:
131
  break # Arrêter l'ajout si on dépasse la limite
132
  else:
133
  # Ajouter à `prompt_f` et mettre à jour le nombre total de tokens
134
+ prompt_f = user_p + assistant_p + prompt_f
135
  total_tokens += current_tokens
136
+ prompt_f=f"{sys_p} {prompt_f} <|im_start|>user \n{user_prom} \n<|im_end|><|im_start|>assistant \n"
 
 
137
  phase_03_start = time.time()
138
  gen =llm.stream_complete(formatted=True, prompt=prompt_f)
139
+ print (f"le nombre TOTAL de tokens : {total_tokens}\n")
140
  print("_"*100)
141
  print(prompt_f)
142
  print("o"*100)
143
  for response in gen:
144
  yield response.text
145
 
146
+
147
  description = """
148
  <p>
149
  <center>