girishwangikar
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,30 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
from smolagents import CodeAgent, tool
|
4 |
-
from typing import Union, List, Dict
|
5 |
from duckduckgo_search import DDGS
|
6 |
import requests
|
7 |
from bs4 import BeautifulSoup
|
8 |
-
from datetime import datetime
|
9 |
from groq import Groq
|
10 |
import os
|
11 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
class GroqLLM:
|
14 |
"""Compatible LLM interface for smolagents CodeAgent"""
|
15 |
-
def __init__(self, model_name="llama-3.1-8B-Instant"):
|
16 |
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
17 |
self.model_name = model_name
|
18 |
|
19 |
def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
|
20 |
-
"""Make the class callable as required by smolagents"""
|
21 |
try:
|
22 |
prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
|
23 |
completion = self.client.chat.completions.create(
|
@@ -40,16 +47,13 @@ class NewsAnalysisAgent(CodeAgent):
|
|
40 |
|
41 |
@property
|
42 |
def articles(self) -> List[Dict]:
|
43 |
-
"""Access stored article data"""
|
44 |
return self._articles
|
45 |
|
46 |
@property
|
47 |
-
def search_results(self) -> List[
|
48 |
-
"""Access stored search results"""
|
49 |
return self._search_results
|
50 |
|
51 |
def run(self, prompt: str) -> str:
|
52 |
-
"""Override run method to include context about available tools"""
|
53 |
enhanced_prompt = f"""
|
54 |
You are a news analysis assistant that can:
|
55 |
- Search for recent news articles
|
@@ -67,74 +71,84 @@ def extract_text_from_url(url: str) -> str:
|
|
67 |
"""Helper function to extract text content from a URL using BeautifulSoup"""
|
68 |
try:
|
69 |
headers = {
|
70 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
|
71 |
}
|
72 |
response = requests.get(url, headers=headers, timeout=10)
|
73 |
response.raise_for_status()
|
74 |
|
75 |
soup = BeautifulSoup(response.text, 'html.parser')
|
76 |
|
77 |
-
# Remove
|
78 |
-
for element in soup(['script', 'style', 'nav', 'header', 'footer'
|
79 |
element.decompose()
|
80 |
|
81 |
# Extract text from paragraphs
|
82 |
paragraphs = soup.find_all('p')
|
83 |
text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
|
84 |
|
85 |
-
|
86 |
-
text = re.sub(r'\s+', ' ', text)
|
87 |
-
return text
|
88 |
|
89 |
except Exception as e:
|
90 |
return f"Error extracting text: {str(e)}"
|
91 |
|
92 |
-
@tool
|
93 |
-
|
94 |
-
"
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
try:
|
104 |
with DDGS() as ddgs:
|
105 |
search_results = list(ddgs.news(
|
106 |
query,
|
107 |
-
max_results=max_results,
|
108 |
-
timeframe='d'
|
109 |
))
|
110 |
|
111 |
# Store results in agent
|
112 |
-
tool.agent._search_results =
|
|
|
|
|
|
|
113 |
|
114 |
# Format results
|
115 |
formatted_results = []
|
116 |
for idx, result in enumerate(search_results, 1):
|
117 |
-
formatted_results.append(
|
|
|
|
|
118 |
|
119 |
return "\n".join(formatted_results)
|
120 |
except Exception as e:
|
121 |
return f"Error searching news: {str(e)}"
|
122 |
|
123 |
-
@tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
def analyze_article(url: str) -> str:
|
125 |
-
"""Extract and analyze content from a news article URL.
|
126 |
-
|
127 |
-
Args:
|
128 |
-
url: URL of the news article to analyze
|
129 |
-
|
130 |
-
Returns:
|
131 |
-
str: Analysis of the article including summary and key points
|
132 |
-
"""
|
133 |
try:
|
134 |
-
# Extract text content
|
135 |
content = extract_text_from_url(url)
|
136 |
|
137 |
-
# Use LLM to generate summary and analysis
|
138 |
analysis_prompt = f"""
|
139 |
Please analyze this article content and provide:
|
140 |
1. A brief summary (2-3 sentences)
|
@@ -142,7 +156,7 @@ def analyze_article(url: str) -> str:
|
|
142 |
3. Main topics/themes discussed
|
143 |
|
144 |
Article content:
|
145 |
-
{content[:3000]}
|
146 |
"""
|
147 |
|
148 |
analysis = tool.agent.model(analysis_prompt)
|
@@ -150,7 +164,7 @@ def analyze_article(url: str) -> str:
|
|
150 |
# Store article data
|
151 |
article_data = {
|
152 |
'url': url,
|
153 |
-
'content': content[:1000],
|
154 |
'analysis': analysis,
|
155 |
'date': datetime.now().strftime('%Y-%m-%d')
|
156 |
}
|
@@ -160,22 +174,24 @@ def analyze_article(url: str) -> str:
|
|
160 |
except Exception as e:
|
161 |
return f"Error analyzing article: {str(e)}"
|
162 |
|
163 |
-
@tool
|
164 |
-
|
165 |
-
"
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
173 |
articles = articles or tool.agent._articles
|
174 |
|
175 |
if not articles:
|
176 |
return "No articles available for trend analysis"
|
177 |
|
178 |
-
# Combine all analyses for trend identification
|
179 |
combined_analyses = "\n".join(article['analysis'] for article in articles)
|
180 |
|
181 |
trend_prompt = f"""
|
@@ -230,14 +246,6 @@ def main():
|
|
230 |
"Use the identify_trends tool to analyze patterns across all articles"
|
231 |
)
|
232 |
st.write(trends)
|
233 |
-
|
234 |
-
# Custom analysis interface
|
235 |
-
st.subheader("Custom Analysis")
|
236 |
-
question = st.text_input("What would you like to know about the news?")
|
237 |
-
if question:
|
238 |
-
with st.spinner('Analyzing...'):
|
239 |
-
result = st.session_state['agent'].run(question)
|
240 |
-
st.write(result)
|
241 |
|
242 |
if __name__ == "__main__":
|
243 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
from smolagents import CodeAgent, tool
|
4 |
+
from typing import Union, List, Dict, Optional
|
5 |
from duckduckgo_search import DDGS
|
6 |
import requests
|
7 |
from bs4 import BeautifulSoup
|
8 |
+
from datetime import datetime
|
9 |
from groq import Groq
|
10 |
import os
|
11 |
import re
|
12 |
+
from dataclasses import dataclass
|
13 |
+
|
14 |
+
@dataclass
|
15 |
+
class SearchResult:
|
16 |
+
"""Data class to store search results"""
|
17 |
+
title: str
|
18 |
+
link: str
|
19 |
+
date: str
|
20 |
|
21 |
class GroqLLM:
|
22 |
"""Compatible LLM interface for smolagents CodeAgent"""
|
23 |
+
def __init__(self, model_name: str = "llama-3.1-8B-Instant"):
|
24 |
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
25 |
self.model_name = model_name
|
26 |
|
27 |
def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
|
|
|
28 |
try:
|
29 |
prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
|
30 |
completion = self.client.chat.completions.create(
|
|
|
47 |
|
48 |
@property
|
49 |
def articles(self) -> List[Dict]:
|
|
|
50 |
return self._articles
|
51 |
|
52 |
@property
|
53 |
+
def search_results(self) -> List[SearchResult]:
|
|
|
54 |
return self._search_results
|
55 |
|
56 |
def run(self, prompt: str) -> str:
|
|
|
57 |
enhanced_prompt = f"""
|
58 |
You are a news analysis assistant that can:
|
59 |
- Search for recent news articles
|
|
|
71 |
"""Helper function to extract text content from a URL using BeautifulSoup"""
|
72 |
try:
|
73 |
headers = {
|
74 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
75 |
}
|
76 |
response = requests.get(url, headers=headers, timeout=10)
|
77 |
response.raise_for_status()
|
78 |
|
79 |
soup = BeautifulSoup(response.text, 'html.parser')
|
80 |
|
81 |
+
# Remove unwanted elements
|
82 |
+
for element in soup(['script', 'style', 'nav', 'header', 'footer']):
|
83 |
element.decompose()
|
84 |
|
85 |
# Extract text from paragraphs
|
86 |
paragraphs = soup.find_all('p')
|
87 |
text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
|
88 |
|
89 |
+
return re.sub(r'\s+', ' ', text)
|
|
|
|
|
90 |
|
91 |
except Exception as e:
|
92 |
return f"Error extracting text: {str(e)}"
|
93 |
|
94 |
+
@tool(
|
95 |
+
name="search_news",
|
96 |
+
description="Search for recent news articles using DuckDuckGo",
|
97 |
+
inputs={
|
98 |
+
"query": {
|
99 |
+
"type": "string",
|
100 |
+
"description": "Search query string"
|
101 |
+
},
|
102 |
+
"max_results": {
|
103 |
+
"type": "integer",
|
104 |
+
"description": "Maximum number of results to return",
|
105 |
+
"default": 5,
|
106 |
+
"nullable": True
|
107 |
+
}
|
108 |
+
}
|
109 |
+
)
|
110 |
+
def search_news(query: str, max_results: Optional[int] = 5) -> str:
|
111 |
+
"""Search for recent news articles using DuckDuckGo."""
|
112 |
try:
|
113 |
with DDGS() as ddgs:
|
114 |
search_results = list(ddgs.news(
|
115 |
query,
|
116 |
+
max_results=max_results or 5,
|
117 |
+
timeframe='d'
|
118 |
))
|
119 |
|
120 |
# Store results in agent
|
121 |
+
tool.agent._search_results = [
|
122 |
+
SearchResult(title=r['title'], link=r['link'], date=r['date'])
|
123 |
+
for r in search_results
|
124 |
+
]
|
125 |
|
126 |
# Format results
|
127 |
formatted_results = []
|
128 |
for idx, result in enumerate(search_results, 1):
|
129 |
+
formatted_results.append(
|
130 |
+
f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n"
|
131 |
+
)
|
132 |
|
133 |
return "\n".join(formatted_results)
|
134 |
except Exception as e:
|
135 |
return f"Error searching news: {str(e)}"
|
136 |
|
137 |
+
@tool(
|
138 |
+
name="analyze_article",
|
139 |
+
description="Extract and analyze content from a news article URL",
|
140 |
+
inputs={
|
141 |
+
"url": {
|
142 |
+
"type": "string",
|
143 |
+
"description": "URL of the news article to analyze"
|
144 |
+
}
|
145 |
+
}
|
146 |
+
)
|
147 |
def analyze_article(url: str) -> str:
|
148 |
+
"""Extract and analyze content from a news article URL."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
try:
|
|
|
150 |
content = extract_text_from_url(url)
|
151 |
|
|
|
152 |
analysis_prompt = f"""
|
153 |
Please analyze this article content and provide:
|
154 |
1. A brief summary (2-3 sentences)
|
|
|
156 |
3. Main topics/themes discussed
|
157 |
|
158 |
Article content:
|
159 |
+
{content[:3000]}
|
160 |
"""
|
161 |
|
162 |
analysis = tool.agent.model(analysis_prompt)
|
|
|
164 |
# Store article data
|
165 |
article_data = {
|
166 |
'url': url,
|
167 |
+
'content': content[:1000],
|
168 |
'analysis': analysis,
|
169 |
'date': datetime.now().strftime('%Y-%m-%d')
|
170 |
}
|
|
|
174 |
except Exception as e:
|
175 |
return f"Error analyzing article: {str(e)}"
|
176 |
|
177 |
+
@tool(
|
178 |
+
name="identify_trends",
|
179 |
+
description="Identify common themes and trends across analyzed articles",
|
180 |
+
inputs={
|
181 |
+
"articles": {
|
182 |
+
"type": "array",
|
183 |
+
"description": "List of analyzed articles",
|
184 |
+
"nullable": True
|
185 |
+
}
|
186 |
+
}
|
187 |
+
)
|
188 |
+
def identify_trends(articles: Optional[List[Dict]] = None) -> str:
|
189 |
+
"""Identify common themes and trends across analyzed articles."""
|
190 |
articles = articles or tool.agent._articles
|
191 |
|
192 |
if not articles:
|
193 |
return "No articles available for trend analysis"
|
194 |
|
|
|
195 |
combined_analyses = "\n".join(article['analysis'] for article in articles)
|
196 |
|
197 |
trend_prompt = f"""
|
|
|
246 |
"Use the identify_trends tool to analyze patterns across all articles"
|
247 |
)
|
248 |
st.write(trends)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
if __name__ == "__main__":
|
251 |
main()
|