Update app.py
Browse files
app.py
CHANGED
@@ -3,17 +3,12 @@ import pandas as pd
|
|
3 |
from smolagents import CodeAgent, tool
|
4 |
from typing import Union, List, Dict
|
5 |
from duckduckgo_search import DDGS
|
6 |
-
|
|
|
7 |
from datetime import datetime, timedelta
|
8 |
-
import nltk
|
9 |
from groq import Groq
|
10 |
import os
|
11 |
-
|
12 |
-
# Download required NLTK data
|
13 |
-
nltk.download('punkt')
|
14 |
-
nltk.download('averaged_perceptron_tagger')
|
15 |
-
nltk.download('maxent_ne_chunker')
|
16 |
-
nltk.download('words')
|
17 |
|
18 |
class GroqLLM:
|
19 |
"""Compatible LLM interface for smolagents CodeAgent"""
|
@@ -68,6 +63,32 @@ class NewsAnalysisAgent(CodeAgent):
|
|
68 |
"""
|
69 |
return super().run(enhanced_prompt)
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
@tool
|
72 |
def search_news(query: str, max_results: int = 5) -> str:
|
73 |
"""Search for recent news articles using DuckDuckGo.
|
@@ -107,37 +128,34 @@ def analyze_article(url: str) -> str:
|
|
107 |
url: URL of the news article to analyze
|
108 |
|
109 |
Returns:
|
110 |
-
str: Analysis of the article including summary
|
111 |
"""
|
112 |
try:
|
113 |
-
#
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# Store article data
|
120 |
article_data = {
|
121 |
'url': url,
|
122 |
-
'
|
123 |
-
'
|
124 |
-
'
|
125 |
-
'publish_date': article.publish_date
|
126 |
}
|
127 |
tool.agent._articles.append(article_data)
|
128 |
|
129 |
-
# Format analysis
|
130 |
-
analysis = f"""
|
131 |
-
Title: {article.title}
|
132 |
-
|
133 |
-
Summary: {article.summary}
|
134 |
-
|
135 |
-
Key Points:
|
136 |
-
{', '.join(article.keywords)}
|
137 |
-
|
138 |
-
Publication Date: {article.publish_date}
|
139 |
-
"""
|
140 |
-
|
141 |
return analysis
|
142 |
except Exception as e:
|
143 |
return f"Error analyzing article: {str(e)}"
|
@@ -157,24 +175,20 @@ def identify_trends(articles: List[Dict] = None) -> str:
|
|
157 |
if not articles:
|
158 |
return "No articles available for trend analysis"
|
159 |
|
160 |
-
#
|
161 |
-
|
162 |
-
for article in articles:
|
163 |
-
all_keywords.extend(article.get('keywords', []))
|
164 |
-
|
165 |
-
# Count keyword frequencies
|
166 |
-
keyword_freq = pd.Series(all_keywords).value_counts()
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
Common
|
171 |
-
|
|
|
172 |
|
173 |
-
|
174 |
-
|
175 |
"""
|
176 |
|
177 |
-
return
|
178 |
|
179 |
def main():
|
180 |
st.title("News Analysis Assistant")
|
@@ -186,7 +200,7 @@ def main():
|
|
186 |
tools=[search_news, analyze_article, identify_trends],
|
187 |
model=GroqLLM(),
|
188 |
additional_authorized_imports=[
|
189 |
-
"
|
190 |
]
|
191 |
)
|
192 |
|
|
|
3 |
from smolagents import CodeAgent, tool
|
4 |
from typing import Union, List, Dict
|
5 |
from duckduckgo_search import DDGS
|
6 |
+
import requests
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
from datetime import datetime, timedelta
|
|
|
9 |
from groq import Groq
|
10 |
import os
|
11 |
+
import re
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
class GroqLLM:
|
14 |
"""Compatible LLM interface for smolagents CodeAgent"""
|
|
|
63 |
"""
|
64 |
return super().run(enhanced_prompt)
|
65 |
|
66 |
+
def extract_text_from_url(url: str) -> str:
|
67 |
+
"""Helper function to extract text content from a URL using BeautifulSoup"""
|
68 |
+
try:
|
69 |
+
headers = {
|
70 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
71 |
+
}
|
72 |
+
response = requests.get(url, headers=headers, timeout=10)
|
73 |
+
response.raise_for_status()
|
74 |
+
|
75 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
76 |
+
|
77 |
+
# Remove scripts, styles, and navigation elements
|
78 |
+
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
|
79 |
+
element.decompose()
|
80 |
+
|
81 |
+
# Extract text from paragraphs
|
82 |
+
paragraphs = soup.find_all('p')
|
83 |
+
text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
|
84 |
+
|
85 |
+
# Basic cleaning
|
86 |
+
text = re.sub(r'\s+', ' ', text)
|
87 |
+
return text
|
88 |
+
|
89 |
+
except Exception as e:
|
90 |
+
return f"Error extracting text: {str(e)}"
|
91 |
+
|
92 |
@tool
|
93 |
def search_news(query: str, max_results: int = 5) -> str:
|
94 |
"""Search for recent news articles using DuckDuckGo.
|
|
|
128 |
url: URL of the news article to analyze
|
129 |
|
130 |
Returns:
|
131 |
+
str: Analysis of the article including summary and key points
|
132 |
"""
|
133 |
try:
|
134 |
+
# Extract text content
|
135 |
+
content = extract_text_from_url(url)
|
136 |
+
|
137 |
+
# Use LLM to generate summary and analysis
|
138 |
+
analysis_prompt = f"""
|
139 |
+
Please analyze this article content and provide:
|
140 |
+
1. A brief summary (2-3 sentences)
|
141 |
+
2. Key points (3-5 main takeaways)
|
142 |
+
3. Main topics/themes discussed
|
143 |
+
|
144 |
+
Article content:
|
145 |
+
{content[:3000]} # Limit content length for token constraints
|
146 |
+
"""
|
147 |
+
|
148 |
+
analysis = tool.agent.model(analysis_prompt)
|
149 |
|
150 |
# Store article data
|
151 |
article_data = {
|
152 |
'url': url,
|
153 |
+
'content': content[:1000], # Store truncated content
|
154 |
+
'analysis': analysis,
|
155 |
+
'date': datetime.now().strftime('%Y-%m-%d')
|
|
|
156 |
}
|
157 |
tool.agent._articles.append(article_data)
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
return analysis
|
160 |
except Exception as e:
|
161 |
return f"Error analyzing article: {str(e)}"
|
|
|
175 |
if not articles:
|
176 |
return "No articles available for trend analysis"
|
177 |
|
178 |
+
# Combine all analyses for trend identification
|
179 |
+
combined_analyses = "\n".join(article['analysis'] for article in articles)
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
trend_prompt = f"""
|
182 |
+
Based on the analyses of {len(articles)} articles, please identify:
|
183 |
+
1. Common themes or topics across articles
|
184 |
+
2. Any notable patterns or trends
|
185 |
+
3. Different perspectives or viewpoints presented
|
186 |
|
187 |
+
Combined analyses:
|
188 |
+
{combined_analyses}
|
189 |
"""
|
190 |
|
191 |
+
return tool.agent.model(trend_prompt)
|
192 |
|
193 |
def main():
|
194 |
st.title("News Analysis Assistant")
|
|
|
200 |
tools=[search_news, analyze_article, identify_trends],
|
201 |
model=GroqLLM(),
|
202 |
additional_authorized_imports=[
|
203 |
+
"requests", "bs4", "duckduckgo_search", "pandas"
|
204 |
]
|
205 |
)
|
206 |
|