girishwangikar commited on
Commit
91818a9
·
verified ·
1 Parent(s): e4fc134

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -61
app.py CHANGED
@@ -1,23 +1,30 @@
1
  import streamlit as st
2
  import pandas as pd
3
  from smolagents import CodeAgent, tool
4
- from typing import Union, List, Dict
5
  from duckduckgo_search import DDGS
6
  import requests
7
  from bs4 import BeautifulSoup
8
- from datetime import datetime, timedelta
9
  from groq import Groq
10
  import os
11
  import re
 
 
 
 
 
 
 
 
12
 
13
  class GroqLLM:
14
  """Compatible LLM interface for smolagents CodeAgent"""
15
- def __init__(self, model_name="llama-3.1-8B-Instant"):
16
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
  self.model_name = model_name
18
 
19
  def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
20
- """Make the class callable as required by smolagents"""
21
  try:
22
  prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
23
  completion = self.client.chat.completions.create(
@@ -40,16 +47,13 @@ class NewsAnalysisAgent(CodeAgent):
40
 
41
  @property
42
  def articles(self) -> List[Dict]:
43
- """Access stored article data"""
44
  return self._articles
45
 
46
  @property
47
- def search_results(self) -> List[Dict]:
48
- """Access stored search results"""
49
  return self._search_results
50
 
51
  def run(self, prompt: str) -> str:
52
- """Override run method to include context about available tools"""
53
  enhanced_prompt = f"""
54
  You are a news analysis assistant that can:
55
  - Search for recent news articles
@@ -67,74 +71,84 @@ def extract_text_from_url(url: str) -> str:
67
  """Helper function to extract text content from a URL using BeautifulSoup"""
68
  try:
69
  headers = {
70
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
71
  }
72
  response = requests.get(url, headers=headers, timeout=10)
73
  response.raise_for_status()
74
 
75
  soup = BeautifulSoup(response.text, 'html.parser')
76
 
77
- # Remove scripts, styles, and navigation elements
78
- for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
79
  element.decompose()
80
 
81
  # Extract text from paragraphs
82
  paragraphs = soup.find_all('p')
83
  text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
84
 
85
- # Basic cleaning
86
- text = re.sub(r'\s+', ' ', text)
87
- return text
88
 
89
  except Exception as e:
90
  return f"Error extracting text: {str(e)}"
91
 
92
- @tool
93
- def search_news(query: str, max_results: int = 5) -> str:
94
- """Search for recent news articles using DuckDuckGo.
95
-
96
- Args:
97
- query: Search query string
98
- max_results: Maximum number of results to return
99
-
100
- Returns:
101
- str: Formatted string containing search results with titles and URLs
102
- """
 
 
 
 
 
 
 
103
  try:
104
  with DDGS() as ddgs:
105
  search_results = list(ddgs.news(
106
  query,
107
- max_results=max_results,
108
- timeframe='d' # Last 24 hours
109
  ))
110
 
111
  # Store results in agent
112
- tool.agent._search_results = search_results
 
 
 
113
 
114
  # Format results
115
  formatted_results = []
116
  for idx, result in enumerate(search_results, 1):
117
- formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n")
 
 
118
 
119
  return "\n".join(formatted_results)
120
  except Exception as e:
121
  return f"Error searching news: {str(e)}"
122
 
123
- @tool
 
 
 
 
 
 
 
 
 
124
  def analyze_article(url: str) -> str:
125
- """Extract and analyze content from a news article URL.
126
-
127
- Args:
128
- url: URL of the news article to analyze
129
-
130
- Returns:
131
- str: Analysis of the article including summary and key points
132
- """
133
  try:
134
- # Extract text content
135
  content = extract_text_from_url(url)
136
 
137
- # Use LLM to generate summary and analysis
138
  analysis_prompt = f"""
139
  Please analyze this article content and provide:
140
  1. A brief summary (2-3 sentences)
@@ -142,7 +156,7 @@ def analyze_article(url: str) -> str:
142
  3. Main topics/themes discussed
143
 
144
  Article content:
145
- {content[:3000]} # Limit content length for token constraints
146
  """
147
 
148
  analysis = tool.agent.model(analysis_prompt)
@@ -150,7 +164,7 @@ def analyze_article(url: str) -> str:
150
  # Store article data
151
  article_data = {
152
  'url': url,
153
- 'content': content[:1000], # Store truncated content
154
  'analysis': analysis,
155
  'date': datetime.now().strftime('%Y-%m-%d')
156
  }
@@ -160,22 +174,24 @@ def analyze_article(url: str) -> str:
160
  except Exception as e:
161
  return f"Error analyzing article: {str(e)}"
162
 
163
- @tool
164
- def identify_trends(articles: List[Dict] = None) -> str:
165
- """Identify common themes and trends across analyzed articles.
166
-
167
- Args:
168
- articles: List of analyzed article data (optional, uses stored articles if None)
169
-
170
- Returns:
171
- str: Analysis of trends and patterns found across articles
172
- """
 
 
 
173
  articles = articles or tool.agent._articles
174
 
175
  if not articles:
176
  return "No articles available for trend analysis"
177
 
178
- # Combine all analyses for trend identification
179
  combined_analyses = "\n".join(article['analysis'] for article in articles)
180
 
181
  trend_prompt = f"""
@@ -230,14 +246,6 @@ def main():
230
  "Use the identify_trends tool to analyze patterns across all articles"
231
  )
232
  st.write(trends)
233
-
234
- # Custom analysis interface
235
- st.subheader("Custom Analysis")
236
- question = st.text_input("What would you like to know about the news?")
237
- if question:
238
- with st.spinner('Analyzing...'):
239
- result = st.session_state['agent'].run(question)
240
- st.write(result)
241
 
242
  if __name__ == "__main__":
243
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
  from smolagents import CodeAgent, tool
4
+ from typing import Union, List, Dict, Optional
5
  from duckduckgo_search import DDGS
6
  import requests
7
  from bs4 import BeautifulSoup
8
+ from datetime import datetime
9
  from groq import Groq
10
  import os
11
  import re
12
+ from dataclasses import dataclass
13
+
14
+ @dataclass
15
+ class SearchResult:
16
+ """Data class to store search results"""
17
+ title: str
18
+ link: str
19
+ date: str
20
 
21
  class GroqLLM:
22
  """Compatible LLM interface for smolagents CodeAgent"""
23
+ def __init__(self, model_name: str = "llama-3.1-8B-Instant"):
24
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
25
  self.model_name = model_name
26
 
27
  def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
 
28
  try:
29
  prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
30
  completion = self.client.chat.completions.create(
 
47
 
48
  @property
49
  def articles(self) -> List[Dict]:
 
50
  return self._articles
51
 
52
  @property
53
+ def search_results(self) -> List[SearchResult]:
 
54
  return self._search_results
55
 
56
  def run(self, prompt: str) -> str:
 
57
  enhanced_prompt = f"""
58
  You are a news analysis assistant that can:
59
  - Search for recent news articles
 
71
  """Helper function to extract text content from a URL using BeautifulSoup"""
72
  try:
73
  headers = {
74
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
75
  }
76
  response = requests.get(url, headers=headers, timeout=10)
77
  response.raise_for_status()
78
 
79
  soup = BeautifulSoup(response.text, 'html.parser')
80
 
81
+ # Remove unwanted elements
82
+ for element in soup(['script', 'style', 'nav', 'header', 'footer']):
83
  element.decompose()
84
 
85
  # Extract text from paragraphs
86
  paragraphs = soup.find_all('p')
87
  text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
88
 
89
+ return re.sub(r'\s+', ' ', text)
 
 
90
 
91
  except Exception as e:
92
  return f"Error extracting text: {str(e)}"
93
 
94
+ @tool(
95
+ name="search_news",
96
+ description="Search for recent news articles using DuckDuckGo",
97
+ inputs={
98
+ "query": {
99
+ "type": "string",
100
+ "description": "Search query string"
101
+ },
102
+ "max_results": {
103
+ "type": "integer",
104
+ "description": "Maximum number of results to return",
105
+ "default": 5,
106
+ "nullable": True
107
+ }
108
+ }
109
+ )
110
+ def search_news(query: str, max_results: Optional[int] = 5) -> str:
111
+ """Search for recent news articles using DuckDuckGo."""
112
  try:
113
  with DDGS() as ddgs:
114
  search_results = list(ddgs.news(
115
  query,
116
+ max_results=max_results or 5,
117
+ timeframe='d'
118
  ))
119
 
120
  # Store results in agent
121
+ tool.agent._search_results = [
122
+ SearchResult(title=r['title'], link=r['link'], date=r['date'])
123
+ for r in search_results
124
+ ]
125
 
126
  # Format results
127
  formatted_results = []
128
  for idx, result in enumerate(search_results, 1):
129
+ formatted_results.append(
130
+ f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n"
131
+ )
132
 
133
  return "\n".join(formatted_results)
134
  except Exception as e:
135
  return f"Error searching news: {str(e)}"
136
 
137
+ @tool(
138
+ name="analyze_article",
139
+ description="Extract and analyze content from a news article URL",
140
+ inputs={
141
+ "url": {
142
+ "type": "string",
143
+ "description": "URL of the news article to analyze"
144
+ }
145
+ }
146
+ )
147
  def analyze_article(url: str) -> str:
148
+ """Extract and analyze content from a news article URL."""
 
 
 
 
 
 
 
149
  try:
 
150
  content = extract_text_from_url(url)
151
 
 
152
  analysis_prompt = f"""
153
  Please analyze this article content and provide:
154
  1. A brief summary (2-3 sentences)
 
156
  3. Main topics/themes discussed
157
 
158
  Article content:
159
+ {content[:3000]}
160
  """
161
 
162
  analysis = tool.agent.model(analysis_prompt)
 
164
  # Store article data
165
  article_data = {
166
  'url': url,
167
+ 'content': content[:1000],
168
  'analysis': analysis,
169
  'date': datetime.now().strftime('%Y-%m-%d')
170
  }
 
174
  except Exception as e:
175
  return f"Error analyzing article: {str(e)}"
176
 
177
+ @tool(
178
+ name="identify_trends",
179
+ description="Identify common themes and trends across analyzed articles",
180
+ inputs={
181
+ "articles": {
182
+ "type": "array",
183
+ "description": "List of analyzed articles",
184
+ "nullable": True
185
+ }
186
+ }
187
+ )
188
+ def identify_trends(articles: Optional[List[Dict]] = None) -> str:
189
+ """Identify common themes and trends across analyzed articles."""
190
  articles = articles or tool.agent._articles
191
 
192
  if not articles:
193
  return "No articles available for trend analysis"
194
 
 
195
  combined_analyses = "\n".join(article['analysis'] for article in articles)
196
 
197
  trend_prompt = f"""
 
246
  "Use the identify_trends tool to analyze patterns across all articles"
247
  )
248
  st.write(trends)
 
 
 
 
 
 
 
 
249
 
250
  if __name__ == "__main__":
251
  main()