Shreyas094 commited on
Commit
33d3451
·
verified ·
1 Parent(s): da860a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -15
app.py CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
2
  import requests
3
  import time
4
  import random
 
5
  from bs4 import BeautifulSoup
6
  import trafilatura
 
7
 
8
  USER_AGENTS = [
9
  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
@@ -95,14 +97,42 @@ def search_searx(query, instance_url='https://searx.org', categories='general',
95
 
96
  return "Max retries reached. Please try again later."
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def create_gradio_interface():
99
- """
100
- Creates and returns the Gradio interface with advanced SearXNG options and new parameters.
101
- """
102
  with gr.Blocks() as demo:
103
- gr.Markdown("# 🕵️‍♂️ Advanced SearXNG Search with Content Extraction")
104
  gr.Markdown(
105
- "This application allows you to perform private searches using SearXNG with advanced options and content extraction."
106
  )
107
  with gr.Row():
108
  with gr.Column():
@@ -149,8 +179,6 @@ def create_gradio_interface():
149
  step=1,
150
  label="SafeSearch (0: Off, 1: Moderate, 2: Strict)"
151
  )
152
-
153
- # New parameters
154
  search_engines = gr.Dropdown(
155
  choices=["all", "google", "bing", "duckduckgo", "wikipedia"],
156
  value="all",
@@ -169,18 +197,19 @@ def create_gradio_interface():
169
  step=100,
170
  label="Max Characters to Extract"
171
  )
172
-
173
- search_button = gr.Button("Search")
174
  with gr.Column():
175
- results = gr.Markdown("### Search Results will appear here...")
176
 
177
- def perform_search(q, url, cats, num, use_traf, t_range, lang, safe, engines, sort, chars):
178
- return search_searx(q, instance_url=url, categories=cats, num_results=int(num),
179
- use_trafilatura=use_traf, time_range=t_range, language=lang, safesearch=int(safe),
180
- search_engines=engines, sort_by=sort, max_chars=chars)
 
 
181
 
182
  search_button.click(
183
- perform_search,
184
  inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
185
  search_engines, sort_by, max_chars],
186
  outputs=results
@@ -191,6 +220,7 @@ def create_gradio_interface():
191
  ---
192
  **Note:** This application uses SearXNG to fetch results from multiple sources while preserving your privacy.
193
  It then attempts to extract content from the original sources, which may be subject to the terms of service of those websites.
 
194
  """
195
  )
196
 
 
2
  import requests
3
  import time
4
  import random
5
+ import os
6
  from bs4 import BeautifulSoup
7
  import trafilatura
8
+ from huggingface_hub import InferenceClient
9
 
10
  USER_AGENTS = [
11
  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
 
97
 
98
  return "Max retries reached. Please try again later."
99
 
100
+
101
+ def summarize_with_llm(query, search_results):
102
+ client = InferenceClient(
103
+ "mistralai/Mistral-Nemo-Instruct-2407",
104
+ token=os.getenv("HUGGINGFACE_API_KEY"),
105
+ )
106
+
107
+ system_prompt = """You are an AI assistant tasked with summarizing search results. Your goal is to provide a concise, informative summary of the search results in relation to the user's query. Focus on the most relevant information and present it in a clear, organized manner."""
108
+
109
+ user_prompt = f"""Query: {query}
110
+
111
+ Search Results:
112
+ {search_results}
113
+
114
+ Please provide a summary of the search results in relation to the query. Highlight the most relevant information, identify any common themes or contradictions, and present the information in a clear and concise manner. If there are any gaps in the information or areas that require further research, please mention them as well."""
115
+
116
+ messages = [
117
+ {"role": "system", "content": system_prompt},
118
+ {"role": "user", "content": user_prompt}
119
+ ]
120
+
121
+ summary = ""
122
+ for message in client.chat_completion(
123
+ messages=messages,
124
+ max_tokens=500,
125
+ stream=True,
126
+ ):
127
+ summary += message.choices[0].delta.content
128
+
129
+ return summary
130
+
131
  def create_gradio_interface():
 
 
 
132
  with gr.Blocks() as demo:
133
+ gr.Markdown("# 🕵️‍♂️ Advanced SearXNG Search with LLM Summary")
134
  gr.Markdown(
135
+ "This application allows you to perform private searches using SearXNG with advanced options and get an AI-generated summary of the results."
136
  )
137
  with gr.Row():
138
  with gr.Column():
 
179
  step=1,
180
  label="SafeSearch (0: Off, 1: Moderate, 2: Strict)"
181
  )
 
 
182
  search_engines = gr.Dropdown(
183
  choices=["all", "google", "bing", "duckduckgo", "wikipedia"],
184
  value="all",
 
197
  step=100,
198
  label="Max Characters to Extract"
199
  )
200
+ search_button = gr.Button("Search and Summarize")
 
201
  with gr.Column():
202
+ results = gr.Markdown("### Search Results and Summary will appear here...")
203
 
204
+ def perform_search_and_summarize(q, url, cats, num, use_traf, t_range, lang, safe, engines, sort, chars):
205
+ search_results = search_searx(q, instance_url=url, categories=cats, num_results=int(num),
206
+ use_trafilatura=use_traf, time_range=t_range, language=lang, safesearch=int(safe),
207
+ search_engines=engines, sort_by=sort, max_chars=chars)
208
+ summary = summarize_with_llm(q, search_results)
209
+ return f"## AI-Generated Summary\n\n{summary}\n\n## Original Search Results\n\n{search_results}"
210
 
211
  search_button.click(
212
+ perform_search_and_summarize,
213
  inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
214
  search_engines, sort_by, max_chars],
215
  outputs=results
 
220
  ---
221
  **Note:** This application uses SearXNG to fetch results from multiple sources while preserving your privacy.
222
  It then attempts to extract content from the original sources, which may be subject to the terms of service of those websites.
223
+ The AI-generated summary is provided by a Mistral Nemo LLM and should be reviewed for accuracy.
224
  """
225
  )
226