girishwangikar commited on
Commit
fd1209e
·
verified ·
1 Parent(s): 5e6d4d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -45
app.py CHANGED
@@ -3,17 +3,12 @@ import pandas as pd
3
  from smolagents import CodeAgent, tool
4
  from typing import Union, List, Dict
5
  from duckduckgo_search import DDGS
6
- from newspaper import Article
 
7
  from datetime import datetime, timedelta
8
- import nltk
9
  from groq import Groq
10
  import os
11
-
12
- # Download required NLTK data
13
- nltk.download('punkt')
14
- nltk.download('averaged_perceptron_tagger')
15
- nltk.download('maxent_ne_chunker')
16
- nltk.download('words')
17
 
18
  class GroqLLM:
19
  """Compatible LLM interface for smolagents CodeAgent"""
@@ -68,6 +63,32 @@ class NewsAnalysisAgent(CodeAgent):
68
  """
69
  return super().run(enhanced_prompt)
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  @tool
72
  def search_news(query: str, max_results: int = 5) -> str:
73
  """Search for recent news articles using DuckDuckGo.
@@ -107,37 +128,34 @@ def analyze_article(url: str) -> str:
107
  url: URL of the news article to analyze
108
 
109
  Returns:
110
- str: Analysis of the article including summary, key points, and entities
111
  """
112
  try:
113
- # Download and parse article
114
- article = Article(url)
115
- article.download()
116
- article.parse()
117
- article.nlp()
 
 
 
 
 
 
 
 
 
 
118
 
119
  # Store article data
120
  article_data = {
121
  'url': url,
122
- 'title': article.title,
123
- 'summary': article.summary,
124
- 'keywords': article.keywords,
125
- 'publish_date': article.publish_date
126
  }
127
  tool.agent._articles.append(article_data)
128
 
129
- # Format analysis
130
- analysis = f"""
131
- Title: {article.title}
132
-
133
- Summary: {article.summary}
134
-
135
- Key Points:
136
- {', '.join(article.keywords)}
137
-
138
- Publication Date: {article.publish_date}
139
- """
140
-
141
  return analysis
142
  except Exception as e:
143
  return f"Error analyzing article: {str(e)}"
@@ -157,24 +175,20 @@ def identify_trends(articles: List[Dict] = None) -> str:
157
  if not articles:
158
  return "No articles available for trend analysis"
159
 
160
- # Collect all keywords
161
- all_keywords = []
162
- for article in articles:
163
- all_keywords.extend(article.get('keywords', []))
164
-
165
- # Count keyword frequencies
166
- keyword_freq = pd.Series(all_keywords).value_counts()
167
 
168
- # Format trends analysis
169
- trends = f"""
170
- Common Themes:
171
- {', '.join(keyword_freq.head().index)}
 
172
 
173
- Articles Analyzed: {len(articles)}
174
- Timespan: {min(a['publish_date'] for a in articles if a.get('publish_date'))} to {max(a['publish_date'] for a in articles if a.get('publish_date'))}
175
  """
176
 
177
- return trends
178
 
179
  def main():
180
  st.title("News Analysis Assistant")
@@ -186,7 +200,7 @@ def main():
186
  tools=[search_news, analyze_article, identify_trends],
187
  model=GroqLLM(),
188
  additional_authorized_imports=[
189
- "newspaper", "nltk", "duckduckgo_search", "pandas"
190
  ]
191
  )
192
 
 
3
  from smolagents import CodeAgent, tool
4
  from typing import Union, List, Dict
5
  from duckduckgo_search import DDGS
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
  from datetime import datetime, timedelta
 
9
  from groq import Groq
10
  import os
11
+ import re
 
 
 
 
 
12
 
13
  class GroqLLM:
14
  """Compatible LLM interface for smolagents CodeAgent"""
 
63
  """
64
  return super().run(enhanced_prompt)
65
 
66
+ def extract_text_from_url(url: str) -> str:
67
+ """Helper function to extract text content from a URL using BeautifulSoup"""
68
+ try:
69
+ headers = {
70
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
71
+ }
72
+ response = requests.get(url, headers=headers, timeout=10)
73
+ response.raise_for_status()
74
+
75
+ soup = BeautifulSoup(response.text, 'html.parser')
76
+
77
+ # Remove scripts, styles, and navigation elements
78
+ for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
79
+ element.decompose()
80
+
81
+ # Extract text from paragraphs
82
+ paragraphs = soup.find_all('p')
83
+ text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
84
+
85
+ # Basic cleaning
86
+ text = re.sub(r'\s+', ' ', text)
87
+ return text
88
+
89
+ except Exception as e:
90
+ return f"Error extracting text: {str(e)}"
91
+
92
  @tool
93
  def search_news(query: str, max_results: int = 5) -> str:
94
  """Search for recent news articles using DuckDuckGo.
 
128
  url: URL of the news article to analyze
129
 
130
  Returns:
131
+ str: Analysis of the article including summary and key points
132
  """
133
  try:
134
+ # Extract text content
135
+ content = extract_text_from_url(url)
136
+
137
+ # Use LLM to generate summary and analysis
138
+ analysis_prompt = f"""
139
+ Please analyze this article content and provide:
140
+ 1. A brief summary (2-3 sentences)
141
+ 2. Key points (3-5 main takeaways)
142
+ 3. Main topics/themes discussed
143
+
144
+ Article content:
145
+ {content[:3000]} # Limit content length for token constraints
146
+ """
147
+
148
+ analysis = tool.agent.model(analysis_prompt)
149
 
150
  # Store article data
151
  article_data = {
152
  'url': url,
153
+ 'content': content[:1000], # Store truncated content
154
+ 'analysis': analysis,
155
+ 'date': datetime.now().strftime('%Y-%m-%d')
 
156
  }
157
  tool.agent._articles.append(article_data)
158
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  return analysis
160
  except Exception as e:
161
  return f"Error analyzing article: {str(e)}"
 
175
  if not articles:
176
  return "No articles available for trend analysis"
177
 
178
+ # Combine all analyses for trend identification
179
+ combined_analyses = "\n".join(article['analysis'] for article in articles)
 
 
 
 
 
180
 
181
+ trend_prompt = f"""
182
+ Based on the analyses of {len(articles)} articles, please identify:
183
+ 1. Common themes or topics across articles
184
+ 2. Any notable patterns or trends
185
+ 3. Different perspectives or viewpoints presented
186
 
187
+ Combined analyses:
188
+ {combined_analyses}
189
  """
190
 
191
+ return tool.agent.model(trend_prompt)
192
 
193
  def main():
194
  st.title("News Analysis Assistant")
 
200
  tools=[search_news, analyze_article, identify_trends],
201
  model=GroqLLM(),
202
  additional_authorized_imports=[
203
+ "requests", "bs4", "duckduckgo_search", "pandas"
204
  ]
205
  )
206