Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
|
|
2 |
import requests
|
3 |
import time
|
4 |
import random
|
|
|
5 |
from bs4 import BeautifulSoup
|
6 |
import trafilatura
|
|
|
7 |
|
8 |
USER_AGENTS = [
|
9 |
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
@@ -95,14 +97,42 @@ def search_searx(query, instance_url='https://searx.org', categories='general',
|
|
95 |
|
96 |
return "Max retries reached. Please try again later."
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
def create_gradio_interface():
|
99 |
-
"""
|
100 |
-
Creates and returns the Gradio interface with advanced SearXNG options and new parameters.
|
101 |
-
"""
|
102 |
with gr.Blocks() as demo:
|
103 |
-
gr.Markdown("# 🕵️♂️ Advanced SearXNG Search with
|
104 |
gr.Markdown(
|
105 |
-
"This application allows you to perform private searches using SearXNG with advanced options and
|
106 |
)
|
107 |
with gr.Row():
|
108 |
with gr.Column():
|
@@ -149,8 +179,6 @@ def create_gradio_interface():
|
|
149 |
step=1,
|
150 |
label="SafeSearch (0: Off, 1: Moderate, 2: Strict)"
|
151 |
)
|
152 |
-
|
153 |
-
# New parameters
|
154 |
search_engines = gr.Dropdown(
|
155 |
choices=["all", "google", "bing", "duckduckgo", "wikipedia"],
|
156 |
value="all",
|
@@ -169,18 +197,19 @@ def create_gradio_interface():
|
|
169 |
step=100,
|
170 |
label="Max Characters to Extract"
|
171 |
)
|
172 |
-
|
173 |
-
search_button = gr.Button("Search")
|
174 |
with gr.Column():
|
175 |
-
results = gr.Markdown("### Search Results will appear here...")
|
176 |
|
177 |
-
def
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
181 |
|
182 |
search_button.click(
|
183 |
-
|
184 |
inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
|
185 |
search_engines, sort_by, max_chars],
|
186 |
outputs=results
|
@@ -191,6 +220,7 @@ def create_gradio_interface():
|
|
191 |
---
|
192 |
**Note:** This application uses SearXNG to fetch results from multiple sources while preserving your privacy.
|
193 |
It then attempts to extract content from the original sources, which may be subject to the terms of service of those websites.
|
|
|
194 |
"""
|
195 |
)
|
196 |
|
|
|
2 |
import requests
|
3 |
import time
|
4 |
import random
|
5 |
+
import os
|
6 |
from bs4 import BeautifulSoup
|
7 |
import trafilatura
|
8 |
+
from huggingface_hub import InferenceClient
|
9 |
|
10 |
USER_AGENTS = [
|
11 |
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
|
97 |
|
98 |
return "Max retries reached. Please try again later."
|
99 |
|
100 |
+
|
101 |
+
def summarize_with_llm(query, search_results):
|
102 |
+
client = InferenceClient(
|
103 |
+
"mistralai/Mistral-Nemo-Instruct-2407",
|
104 |
+
token=os.getenv("HUGGINGFACE_API_KEY"),
|
105 |
+
)
|
106 |
+
|
107 |
+
system_prompt = """You are an AI assistant tasked with summarizing search results. Your goal is to provide a concise, informative summary of the search results in relation to the user's query. Focus on the most relevant information and present it in a clear, organized manner."""
|
108 |
+
|
109 |
+
user_prompt = f"""Query: {query}
|
110 |
+
|
111 |
+
Search Results:
|
112 |
+
{search_results}
|
113 |
+
|
114 |
+
Please provide a summary of the search results in relation to the query. Highlight the most relevant information, identify any common themes or contradictions, and present the information in a clear and concise manner. If there are any gaps in the information or areas that require further research, please mention them as well."""
|
115 |
+
|
116 |
+
messages = [
|
117 |
+
{"role": "system", "content": system_prompt},
|
118 |
+
{"role": "user", "content": user_prompt}
|
119 |
+
]
|
120 |
+
|
121 |
+
summary = ""
|
122 |
+
for message in client.chat_completion(
|
123 |
+
messages=messages,
|
124 |
+
max_tokens=500,
|
125 |
+
stream=True,
|
126 |
+
):
|
127 |
+
summary += message.choices[0].delta.content
|
128 |
+
|
129 |
+
return summary
|
130 |
+
|
131 |
def create_gradio_interface():
|
|
|
|
|
|
|
132 |
with gr.Blocks() as demo:
|
133 |
+
gr.Markdown("# 🕵️♂️ Advanced SearXNG Search with LLM Summary")
|
134 |
gr.Markdown(
|
135 |
+
"This application allows you to perform private searches using SearXNG with advanced options and get an AI-generated summary of the results."
|
136 |
)
|
137 |
with gr.Row():
|
138 |
with gr.Column():
|
|
|
179 |
step=1,
|
180 |
label="SafeSearch (0: Off, 1: Moderate, 2: Strict)"
|
181 |
)
|
|
|
|
|
182 |
search_engines = gr.Dropdown(
|
183 |
choices=["all", "google", "bing", "duckduckgo", "wikipedia"],
|
184 |
value="all",
|
|
|
197 |
step=100,
|
198 |
label="Max Characters to Extract"
|
199 |
)
|
200 |
+
search_button = gr.Button("Search and Summarize")
|
|
|
201 |
with gr.Column():
|
202 |
+
results = gr.Markdown("### Search Results and Summary will appear here...")
|
203 |
|
204 |
+
def perform_search_and_summarize(q, url, cats, num, use_traf, t_range, lang, safe, engines, sort, chars):
|
205 |
+
search_results = search_searx(q, instance_url=url, categories=cats, num_results=int(num),
|
206 |
+
use_trafilatura=use_traf, time_range=t_range, language=lang, safesearch=int(safe),
|
207 |
+
search_engines=engines, sort_by=sort, max_chars=chars)
|
208 |
+
summary = summarize_with_llm(q, search_results)
|
209 |
+
return f"## AI-Generated Summary\n\n{summary}\n\n## Original Search Results\n\n{search_results}"
|
210 |
|
211 |
search_button.click(
|
212 |
+
perform_search_and_summarize,
|
213 |
inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
|
214 |
search_engines, sort_by, max_chars],
|
215 |
outputs=results
|
|
|
220 |
---
|
221 |
**Note:** This application uses SearXNG to fetch results from multiple sources while preserving your privacy.
|
222 |
It then attempts to extract content from the original sources, which may be subject to the terms of service of those websites.
|
223 |
+
The AI-generated summary is provided by a Mistral Nemo LLM and should be reviewed for accuracy.
|
224 |
"""
|
225 |
)
|
226 |
|