girishwangikar commited on
Commit
98ffa93
·
verified ·
1 Parent(s): 7e963f3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +229 -0
app.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from smolagents import CodeAgent, tool
4
+ from typing import Union, List, Dict
5
+ from duckduckgo_search import DDGS
6
+ from newspaper import Article
7
+ from datetime import datetime, timedelta
8
+ import nltk
9
+ from groq import Groq
10
+ import os
11
+
12
+ # Download required NLTK data
13
+ nltk.download('punkt')
14
+ nltk.download('averaged_perceptron_tagger')
15
+ nltk.download('maxent_ne_chunker')
16
+ nltk.download('words')
17
+
18
+ class GroqLLM:
19
+ """Compatible LLM interface for smolagents CodeAgent"""
20
+ def __init__(self, model_name="llama-3.1-8B-Instant"):
21
+ self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
22
+ self.model_name = model_name
23
+
24
+ def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
25
+ """Make the class callable as required by smolagents"""
26
+ try:
27
+ prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
28
+ completion = self.client.chat.completions.create(
29
+ model=self.model_name,
30
+ messages=[{"role": "user", "content": prompt_str}],
31
+ temperature=0.7,
32
+ max_tokens=1024,
33
+ stream=False
34
+ )
35
+ return completion.choices[0].message.content if completion.choices else "Error: No response generated"
36
+ except Exception as e:
37
+ return f"Error generating response: {str(e)}"
38
+
39
+ class NewsAnalysisAgent(CodeAgent):
40
+ """Extended CodeAgent with news search and analysis capabilities"""
41
+ def __init__(self, *args, **kwargs):
42
+ super().__init__(*args, **kwargs)
43
+ self._articles = []
44
+ self._search_results = []
45
+
46
+ @property
47
+ def articles(self) -> List[Dict]:
48
+ """Access stored article data"""
49
+ return self._articles
50
+
51
+ @property
52
+ def search_results(self) -> List[Dict]:
53
+ """Access stored search results"""
54
+ return self._search_results
55
+
56
+ def run(self, prompt: str) -> str:
57
+ """Override run method to include context about available tools"""
58
+ enhanced_prompt = f"""
59
+ You are a news analysis assistant that can:
60
+ - Search for recent news articles
61
+ - Extract and analyze article content
62
+ - Summarize key points
63
+ - Identify trends and patterns
64
+
65
+ Task: {prompt}
66
+
67
+ Use the provided tools to search and analyze news content.
68
+ """
69
+ return super().run(enhanced_prompt)
70
+
71
+ @tool
72
+ def search_news(query: str, max_results: int = 5) -> str:
73
+ """Search for recent news articles using DuckDuckGo.
74
+
75
+ Args:
76
+ query: Search query string
77
+ max_results: Maximum number of results to return
78
+
79
+ Returns:
80
+ str: Formatted string containing search results with titles and URLs
81
+ """
82
+ try:
83
+ with DDGS() as ddgs:
84
+ search_results = list(ddgs.news(
85
+ query,
86
+ max_results=max_results,
87
+ timeframe='d' # Last 24 hours
88
+ ))
89
+
90
+ # Store results in agent
91
+ tool.agent._search_results = search_results
92
+
93
+ # Format results
94
+ formatted_results = []
95
+ for idx, result in enumerate(search_results, 1):
96
+ formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n")
97
+
98
+ return "\n".join(formatted_results)
99
+ except Exception as e:
100
+ return f"Error searching news: {str(e)}"
101
+
102
+ @tool
103
+ def analyze_article(url: str) -> str:
104
+ """Extract and analyze content from a news article URL.
105
+
106
+ Args:
107
+ url: URL of the news article to analyze
108
+
109
+ Returns:
110
+ str: Analysis of the article including summary, key points, and entities
111
+ """
112
+ try:
113
+ # Download and parse article
114
+ article = Article(url)
115
+ article.download()
116
+ article.parse()
117
+ article.nlp()
118
+
119
+ # Store article data
120
+ article_data = {
121
+ 'url': url,
122
+ 'title': article.title,
123
+ 'summary': article.summary,
124
+ 'keywords': article.keywords,
125
+ 'publish_date': article.publish_date
126
+ }
127
+ tool.agent._articles.append(article_data)
128
+
129
+ # Format analysis
130
+ analysis = f"""
131
+ Title: {article.title}
132
+
133
+ Summary: {article.summary}
134
+
135
+ Key Points:
136
+ {', '.join(article.keywords)}
137
+
138
+ Publication Date: {article.publish_date}
139
+ """
140
+
141
+ return analysis
142
+ except Exception as e:
143
+ return f"Error analyzing article: {str(e)}"
144
+
145
+ @tool
146
+ def identify_trends(articles: List[Dict] = None) -> str:
147
+ """Identify common themes and trends across analyzed articles.
148
+
149
+ Args:
150
+ articles: List of analyzed article data (optional, uses stored articles if None)
151
+
152
+ Returns:
153
+ str: Analysis of trends and patterns found across articles
154
+ """
155
+ articles = articles or tool.agent._articles
156
+
157
+ if not articles:
158
+ return "No articles available for trend analysis"
159
+
160
+ # Collect all keywords
161
+ all_keywords = []
162
+ for article in articles:
163
+ all_keywords.extend(article.get('keywords', []))
164
+
165
+ # Count keyword frequencies
166
+ keyword_freq = pd.Series(all_keywords).value_counts()
167
+
168
+ # Format trends analysis
169
+ trends = f"""
170
+ Common Themes:
171
+ {', '.join(keyword_freq.head().index)}
172
+
173
+ Articles Analyzed: {len(articles)}
174
+ Timespan: {min(a['publish_date'] for a in articles if a.get('publish_date'))} to {max(a['publish_date'] for a in articles if a.get('publish_date'))}
175
+ """
176
+
177
+ return trends
178
+
179
+ def main():
180
+ st.title("News Analysis Assistant")
181
+ st.write("Search and analyze recent news articles with natural language interaction.")
182
+
183
+ # Initialize session state
184
+ if 'agent' not in st.session_state:
185
+ st.session_state['agent'] = NewsAnalysisAgent(
186
+ tools=[search_news, analyze_article, identify_trends],
187
+ model=GroqLLM(),
188
+ additional_authorized_imports=[
189
+ "newspaper", "nltk", "duckduckgo_search", "pandas"
190
+ ]
191
+ )
192
+
193
+ # News search interface
194
+ search_query = st.text_input("Enter news search query:")
195
+ if search_query:
196
+ with st.spinner('Searching news...'):
197
+ search_results = st.session_state['agent'].run(
198
+ f"Use the search_news tool to find recent articles about: {search_query}"
199
+ )
200
+ st.write(search_results)
201
+
202
+ # Article analysis interface
203
+ st.subheader("Article Analysis")
204
+ article_url = st.text_input("Enter article URL to analyze:")
205
+ if article_url:
206
+ with st.spinner('Analyzing article...'):
207
+ analysis = st.session_state['agent'].run(
208
+ f"Use the analyze_article tool to analyze this article: {article_url}"
209
+ )
210
+ st.write(analysis)
211
+
212
+ # Trend analysis interface
213
+ if st.button("Analyze Trends"):
214
+ with st.spinner('Identifying trends...'):
215
+ trends = st.session_state['agent'].run(
216
+ "Use the identify_trends tool to analyze patterns across all articles"
217
+ )
218
+ st.write(trends)
219
+
220
+ # Custom analysis interface
221
+ st.subheader("Custom Analysis")
222
+ question = st.text_input("What would you like to know about the news?")
223
+ if question:
224
+ with st.spinner('Analyzing...'):
225
+ result = st.session_state['agent'].run(question)
226
+ st.write(result)
227
+
228
+ if __name__ == "__main__":
229
+ main()