Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Aug 10, 2024

Commit

ee18607

verified ·

1 Parent(s): abc17d4

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -64

app.py CHANGED Viewed

@@ -288,11 +288,7 @@ def chatbot_interface(message, history, use_web_search, model, temperature, num_
     history = history + [(message, "")]
     try:
-        if use_web_search:
-            history[-1] = (message, "Generating response... (This may take a moment)")
-            yield history
-        for response in respond(message, history, model, temperature, num_calls, use_web_search, selected_docs, instruction_key):
             history[-1] = (message, response)
             yield history
     except gr.CancelledError:
@@ -327,7 +323,10 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
             use_web_search = False  # Ensure we use PDF search for summaries
         if use_web_search:
-            for response, _ in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
                 yield response
         else:
             embed = get_embeddings()
@@ -487,70 +486,36 @@ def get_response_with_search(query, model, num_calls=3, temperature=0.1):
     retriever = web_search_database.as_retriever(search_kwargs={"k": 10})
     relevant_docs = retriever.get_relevant_documents(query)
-    context = "\n".join([doc.page_content for doc in relevant_docs[:5]])
-    initial_prompt = f"""Using the following context from web search results:
 {context}
 Write a detailed and complete research document that fulfills the following user request: '{query}'
-Stick closely to the information provided in the context and avoid making unsupported claims."""
-    try:
         client = InferenceClient(model, token=huggingface_token)
-        # Generate initial response
-        initial_response = client.text_generation(initial_prompt, max_new_tokens=1000, temperature=temperature)
-        # Generate critique
-        critique = critique_response(initial_response, context, query, model)
-        final_prompt = f"""Given the following initial response, context, critique, and original query, provide a revised response that addresses the identified issues and sticks closely to the information provided in the context while fully answering the user's query in a detailed and complete research document, after writing the document, please provide a list of sources used in your response.
-User Query: {query}
-Initial Response:
-{initial_response}
-Context:
-{context}
-Critique:
-{critique}
-Revised Response:"""
-        # Generate final response
-        full_response = ""
-        for chunk in client.text_generation(final_prompt, max_new_tokens=1500, temperature=temperature, stream=True):
-            full_response += chunk
-            yield full_response, ""
-        # Add a disclaimer
-        disclaimer = ("\nNote: This response was generated by an AI model based on web search results. "
-                      "While efforts have been made to ensure accuracy, please verify important information from authoritative sources.")
-        full_response += disclaimer
-        yield full_response, ""
-    except Exception as e:
-        logging.error(f"Error in multi-step generation process: {str(e)}")
-        yield f"An error occurred during the response generation process: {str(e)}", ""
-def critique_response(response, context, query, model):
-    critique_prompt = f"""Given the following response, original context, and user query, identify any statements that might be inaccurate, unsupported by the context, or irrelevant to the query. Be specific about which parts may be hallucinations or extrapolations beyond the given information.
-User Query: {query}
-Response:
-{response}
-Original Context:
-{context}
-Critique:"""
-    client = InferenceClient(model, token=huggingface_token)
-    critique = client.text_generation(critique_prompt, max_new_tokens=500, temperature=0.2)
-    return critique
 INSTRUCTION_PROMPTS = {

     history = history + [(message, "")]
     try:
+        for response in respond(message, history, model, temperature, num_calls, use_web_search):
             history[-1] = (message, response)
             yield history
     except gr.CancelledError:
             use_web_search = False  # Ensure we use PDF search for summaries
         if use_web_search:
+            for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
+                response = f"{main_content}\n\n{sources}"
+                first_line = response.split('\n')[0] if response else ''
+#                logging.info(f"Generated Response (first line): {first_line}")
                 yield response
         else:
             embed = get_embeddings()
     retriever = web_search_database.as_retriever(search_kwargs={"k": 10})
     relevant_docs = retriever.get_relevant_documents(query)
+    context = "\n".join([doc.page_content for doc in relevant_docs])
+    prompt = f"""Using the following context from web search results:
 {context}
 Write a detailed and complete research document that fulfills the following user request: '{query}'
+After writing the document, please provide a list of sources used in your response.
+Importantly, only include information that is directly supported by the provided context. If you're unsure about any information, state that it couldn't be verified from the given context."""
+After writing the document, please provide a list of sources used in your response."""
+    if model == "@cf/meta/llama-3.1-8b-instruct":
+        # Use Cloudflare API
+        for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
+            yield response, ""  # Yield streaming response without sources
+    else:
+        # Use Hugging Face API
         client = InferenceClient(model, token=huggingface_token)
+        main_content = ""
+        for i in range(num_calls):
+            for message in client.chat_completion(
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=10000,
+                temperature=temperature,
+                stream=True,
+            ):
+                if message.choices and message.choices[0].delta and message.choices[0].delta.content:
+                    chunk = message.choices[0].delta.content
+                    main_content += chunk
+                    yield main_content, ""  # Yield partial main content without sources
 INSTRUCTION_PROMPTS = {