Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,15 @@ import requests
|
|
3 |
import time
|
4 |
import random
|
5 |
import os
|
|
|
6 |
from bs4 import BeautifulSoup
|
7 |
import trafilatura
|
8 |
from huggingface_hub import InferenceClient
|
9 |
|
|
|
|
|
|
|
|
|
10 |
USER_AGENTS = [
|
11 |
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
12 |
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
@@ -99,34 +104,47 @@ def search_searx(query, instance_url='https://searx.org', categories='general',
|
|
99 |
|
100 |
|
101 |
def summarize_with_llm(query, search_results):
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
Search Results:
|
112 |
{search_results}
|
113 |
|
114 |
Please provide a summary of the search results in relation to the query. Highlight the most relevant information, identify any common themes or contradictions, and present the information in a clear and concise manner. If there are any gaps in the information or areas that require further research, please mention them as well."""
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
128 |
|
129 |
-
|
|
|
|
|
|
|
|
|
130 |
|
131 |
def create_gradio_interface():
|
132 |
with gr.Blocks() as demo:
|
@@ -202,18 +220,27 @@ def create_gradio_interface():
|
|
202 |
results = gr.Markdown("### Search Results and Summary will appear here...")
|
203 |
|
204 |
def perform_search_and_summarize(q, url, cats, num, use_traf, t_range, lang, safe, engines, sort, chars):
|
|
|
|
|
205 |
search_results = search_searx(q, instance_url=url, categories=cats, num_results=int(num),
|
206 |
use_trafilatura=use_traf, time_range=t_range, language=lang, safesearch=int(safe),
|
207 |
search_engines=engines, sort_by=sort, max_chars=chars)
|
|
|
|
|
208 |
summary = summarize_with_llm(q, search_results)
|
|
|
209 |
return f"## AI-Generated Summary\n\n{summary}\n\n## Original Search Results\n\n{search_results}"
|
210 |
-
|
|
|
|
|
|
|
211 |
search_button.click(
|
212 |
perform_search_and_summarize,
|
213 |
inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
|
214 |
search_engines, sort_by, max_chars],
|
215 |
outputs=results
|
216 |
)
|
|
|
217 |
|
218 |
gr.Markdown(
|
219 |
"""
|
@@ -229,4 +256,5 @@ def create_gradio_interface():
|
|
229 |
iface = create_gradio_interface()
|
230 |
|
231 |
if __name__ == "__main__":
|
|
|
232 |
iface.launch()
|
|
|
3 |
import time
|
4 |
import random
|
5 |
import os
|
6 |
+
import logging
|
7 |
from bs4 import BeautifulSoup
|
8 |
import trafilatura
|
9 |
from huggingface_hub import InferenceClient
|
10 |
|
11 |
+
# Set up logging
|
12 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
USER_AGENTS = [
|
16 |
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
17 |
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
|
|
104 |
|
105 |
|
106 |
def summarize_with_llm(query, search_results):
|
107 |
+
logger.debug(f"Attempting to summarize results for query: {query}")
|
108 |
+
try:
|
109 |
+
api_key = os.getenv("HUGGINGFACE_API_KEY")
|
110 |
+
if not api_key:
|
111 |
+
logger.error("HUGGINGFACE_API_KEY environment variable is not set")
|
112 |
+
return "Error: Hugging Face API key is not set. Please set the HUGGINGFACE_API_KEY environment variable."
|
113 |
+
|
114 |
+
logger.debug("Initializing InferenceClient")
|
115 |
+
client = InferenceClient(
|
116 |
+
"mistralai/Mistral-Nemo-Instruct-2407",
|
117 |
+
token=api_key,
|
118 |
+
)
|
119 |
+
|
120 |
+
system_prompt = """You are an AI assistant tasked with summarizing search results. Your goal is to provide a concise, informative summary of the search results in relation to the user's query. Focus on the most relevant information and present it in a clear, organized manner."""
|
121 |
+
|
122 |
+
user_prompt = f"""Query: {query}
|
123 |
|
124 |
Search Results:
|
125 |
{search_results}
|
126 |
|
127 |
Please provide a summary of the search results in relation to the query. Highlight the most relevant information, identify any common themes or contradictions, and present the information in a clear and concise manner. If there are any gaps in the information or areas that require further research, please mention them as well."""
|
128 |
|
129 |
+
messages = [
|
130 |
+
{"role": "system", "content": system_prompt},
|
131 |
+
{"role": "user", "content": user_prompt}
|
132 |
+
]
|
133 |
|
134 |
+
logger.debug("Sending request to Hugging Face API")
|
135 |
+
summary = ""
|
136 |
+
for message in client.chat_completion(
|
137 |
+
messages=messages,
|
138 |
+
max_tokens=500,
|
139 |
+
stream=True,
|
140 |
+
):
|
141 |
+
summary += message.choices[0].delta.content
|
142 |
|
143 |
+
logger.debug("Successfully generated summary")
|
144 |
+
return summary
|
145 |
+
except Exception as e:
|
146 |
+
logger.exception(f"Error in summarize_with_llm: {str(e)}")
|
147 |
+
return f"Error generating summary: {str(e)}"
|
148 |
|
149 |
def create_gradio_interface():
|
150 |
with gr.Blocks() as demo:
|
|
|
220 |
results = gr.Markdown("### Search Results and Summary will appear here...")
|
221 |
|
222 |
def perform_search_and_summarize(q, url, cats, num, use_traf, t_range, lang, safe, engines, sort, chars):
|
223 |
+
logger.debug(f"Performing search for query: {q}")
|
224 |
+
try:
|
225 |
search_results = search_searx(q, instance_url=url, categories=cats, num_results=int(num),
|
226 |
use_trafilatura=use_traf, time_range=t_range, language=lang, safesearch=int(safe),
|
227 |
search_engines=engines, sort_by=sort, max_chars=chars)
|
228 |
+
|
229 |
+
logger.debug("Search completed, attempting to summarize")
|
230 |
summary = summarize_with_llm(q, search_results)
|
231 |
+
|
232 |
return f"## AI-Generated Summary\n\n{summary}\n\n## Original Search Results\n\n{search_results}"
|
233 |
+
except Exception as e:
|
234 |
+
logger.exception(f"Error in perform_search_and_summarize: {str(e)}")
|
235 |
+
return f"An error occurred: {str(e)}"
|
236 |
+
|
237 |
search_button.click(
|
238 |
perform_search_and_summarize,
|
239 |
inputs=[query, instance_url, categories, num_results, use_trafilatura, time_range, language, safesearch,
|
240 |
search_engines, sort_by, max_chars],
|
241 |
outputs=results
|
242 |
)
|
243 |
+
|
244 |
|
245 |
gr.Markdown(
|
246 |
"""
|
|
|
256 |
iface = create_gradio_interface()
|
257 |
|
258 |
if __name__ == "__main__":
|
259 |
+
logger.info("Starting the application")
|
260 |
iface.launch()
|