Spaces:

Shreyas094
/

SearXNG-Engine

Sleeping

App Files Files Community

Shreyas094 commited on Sep 27, 2024

Commit

33d3451

verified ·

1 Parent(s): da860a3

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -15

app.py CHANGED Viewed

@@ -2,8 +2,10 @@ import gradio as gr
 import requests
 import time
 import random
 from bs4 import BeautifulSoup
 import trafilatura
 USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
@@ -95,14 +97,42 @@ def search_searx(query, instance_url='https://searx.org', categories='general',
     return "Max retries reached. Please try again later."
 def create_gradio_interface():
-    """
-    Creates and returns the Gradio interface with advanced SearXNG options and new parameters.
-    """
     with gr.Blocks() as demo:
-        gr.Markdown("# 🕵️‍♂️ Advanced SearXNG Search with Content Extraction")
         gr.Markdown(
-            "This application allows you to perform private searches using SearXNG with advanced options and content extraction."
         )
         with gr.Row():
             with gr.Column():
@@ -149,8 +179,6 @@ def create_gradio_interface():
                     step=1,
                     label="SafeSearch (0: Off, 1: Moderate, 2: Strict)"
                 )
-                # New parameters
                 search_engines = gr.Dropdown(
                     choices=["all", "google", "bing", "duckduckgo", "wikipedia"],
                     value="all",
@@ -169,18 +197,19 @@ def create_gradio_interface():
                     step=100,
                     label="Max Characters to Extract"
                 )
-                search_button = gr.Button("Search")
             with gr.Column():
-                results = gr.Markdown("### Search Results will appear here...")
-        def perform_search(q, url, cats, num, use_traf, t_range, lang, safe, engines, sort, chars):
-            return search_searx(q, instance_url=url, categories=cats, num_results=int(num),
-                                use_trafilatura=use_traf, time_range=t_range, language=lang, safesearch=int(safe),
-                                search_engines=engines, sort_by=sort, max_chars=chars)
         search_button.click(
-            perform_search,
             inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
                     search_engines, sort_by, max_chars],
             outputs=results
@@ -191,6 +220,7 @@ def create_gradio_interface():
             ---
             **Note:** This application uses SearXNG to fetch results from multiple sources while preserving your privacy.
             It then attempts to extract content from the original sources, which may be subject to the terms of service of those websites.
             """
         )

 import requests
 import time
 import random
+import os
 from bs4 import BeautifulSoup
 import trafilatura
+from huggingface_hub import InferenceClient
 USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
     return "Max retries reached. Please try again later."
+def summarize_with_llm(query, search_results):
+    client = InferenceClient(
+        "mistralai/Mistral-Nemo-Instruct-2407",
+        token=os.getenv("HUGGINGFACE_API_KEY"),
+    )
+    system_prompt = """You are an AI assistant tasked with summarizing search results. Your goal is to provide a concise, informative summary of the search results in relation to the user's query. Focus on the most relevant information and present it in a clear, organized manner."""
+    user_prompt = f"""Query: {query}
+Search Results:
+{search_results}
+Please provide a summary of the search results in relation to the query. Highlight the most relevant information, identify any common themes or contradictions, and present the information in a clear and concise manner. If there are any gaps in the information or areas that require further research, please mention them as well."""
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt}
+    ]
+    summary = ""
+    for message in client.chat_completion(
+        messages=messages,
+        max_tokens=500,
+        stream=True,
+    ):
+        summary += message.choices[0].delta.content
+    return summary
 def create_gradio_interface():
     with gr.Blocks() as demo:
+        gr.Markdown("# 🕵️‍♂️ Advanced SearXNG Search with LLM Summary")
         gr.Markdown(
+            "This application allows you to perform private searches using SearXNG with advanced options and get an AI-generated summary of the results."
         )
         with gr.Row():
             with gr.Column():
                     step=1,
                     label="SafeSearch (0: Off, 1: Moderate, 2: Strict)"
                 )
                 search_engines = gr.Dropdown(
                     choices=["all", "google", "bing", "duckduckgo", "wikipedia"],
                     value="all",
                     step=100,
                     label="Max Characters to Extract"
                 )
+                search_button = gr.Button("Search and Summarize")
             with gr.Column():
+                results = gr.Markdown("### Search Results and Summary will appear here...")
+        def perform_search_and_summarize(q, url, cats, num, use_traf, t_range, lang, safe, engines, sort, chars):
+            search_results = search_searx(q, instance_url=url, categories=cats, num_results=int(num),
+                                          use_trafilatura=use_traf, time_range=t_range, language=lang, safesearch=int(safe),
+                                          search_engines=engines, sort_by=sort, max_chars=chars)
+            summary = summarize_with_llm(q, search_results)
+            return f"## AI-Generated Summary\n\n{summary}\n\n## Original Search Results\n\n{search_results}"
         search_button.click(
+            perform_search_and_summarize,
             inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
                     search_engines, sort_by, max_chars],
             outputs=results
             ---
             **Note:** This application uses SearXNG to fetch results from multiple sources while preserving your privacy.
             It then attempts to extract content from the original sources, which may be subject to the terms of service of those websites.
+            The AI-generated summary is provided by a Mistral Nemo LLM and should be reviewed for accuracy.
             """
         )