Spaces:

kheopss
/

METROPOLE_CHATBOT_FINAL

Sleeping

App Files Files Community

kheopss commited on Aug 25, 2024

Commit

5b58187

verified ·

1 Parent(s): 2c56e55

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -14

app.py CHANGED Viewed

@@ -106,7 +106,7 @@ def estimate_tokens(text):
 def process_final(user_prom,history):
     import time
     all_process_start = time.time()
-    system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents, citing them with <co: doc_id></co> tags. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
     new_nodes = get_retrieved_nodes(
       user_prom,
       vector_top_k=5,
@@ -116,35 +116,34 @@ def process_final(user_prom,history):
     get_texts = get_all_text(new_nodes)
     print("PHASE 03 passing to LLM\n")
-    prompt_f = f"<|im_start|>system \n{system_p}\n DOCUMENTS {get_texts}\n<|im_end|>"
     total_tokens = estimate_tokens(prompt_f)
-    # Ajout de l'historique en commençant par les plus récents
     for val in reversed(history):
-        user_text = f" <|im_start|>user \n {val[0]}\n<|im_end|>" if val[0] else ""
-        assistant_text = f" <|im_start|>assistant \n {val[1]}\n<|im_end|>" if val[1] else ""
-        current_tokens = estimate_tokens(user_text + assistant_text)
         # Vérifier si l'ajout de cet historique dépasse la limite
         if total_tokens + current_tokens > 3000:
             break  # Arrêter l'ajout si on dépasse la limite
         else:
             # Ajouter à `prompt_f` et mettre à jour le nombre total de tokens
-            prompt_f += user_text + assistant_text
             total_tokens += current_tokens
-    print (f"le nombre TOTAL de token : {total_tokens}\n")
-    prompt_f+=f" <|im_start|>user \n{user_prom} \n<|im_end|><|im_start|>assistant \n"
     phase_03_start = time.time()
     gen =llm.stream_complete(formatted=True, prompt=prompt_f)
     print("_"*100)
     print(prompt_f)
     print("o"*100)
     for response in gen:
         yield response.text
 description = """
 <p>
 <center>

 def process_final(user_prom,history):
     import time
     all_process_start = time.time()
+    system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
     new_nodes = get_retrieved_nodes(
       user_prom,
       vector_top_k=5,
     get_texts = get_all_text(new_nodes)
     print("PHASE 03 passing to LLM\n")
+    sys_p = f"<|im_start|>system \n{system_p}\n DOCUMENTS {get_texts}\n<|im_end|>"
+    prompt_f=""
     total_tokens = estimate_tokens(prompt_f)
     for val in reversed(history):
+        if val[0]:
+            user_p = f" <|im_start|>user \n {val[0]}\n<|im_end|>"
+        if val[1]:
+            assistant_p = f" <|im_start|>assistant \n {val[1]}\n<|im_end|>"
+        current_tokens = estimate_tokens(user_p+assistant_p)
         # Vérifier si l'ajout de cet historique dépasse la limite
         if total_tokens + current_tokens > 3000:
             break  # Arrêter l'ajout si on dépasse la limite
         else:
             # Ajouter à `prompt_f` et mettre à jour le nombre total de tokens
+            prompt_f = user_p + assistant_p + prompt_f
             total_tokens += current_tokens
+    prompt_f=f"{sys_p} {prompt_f} <|im_start|>user \n{user_prom} \n<|im_end|><|im_start|>assistant \n"
     phase_03_start = time.time()
     gen =llm.stream_complete(formatted=True, prompt=prompt_f)
+    print (f"le nombre TOTAL de tokens : {total_tokens}\n")
     print("_"*100)
     print(prompt_f)
     print("o"*100)
     for response in gen:
         yield response.text
 description = """
 <p>
 <center>