import streamlit as st import pandas as pd from smolagents import CodeAgent, tool from typing import Union, List, Dict from duckduckgo_search import DDGS from newspaper import Article from datetime import datetime, timedelta import nltk from groq import Groq import os # Download required NLTK data nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('maxent_ne_chunker') nltk.download('words') class GroqLLM: """Compatible LLM interface for smolagents CodeAgent""" def __init__(self, model_name="llama-3.1-8B-Instant"): self.client = Groq(api_key=os.environ.get("GROQ_API_KEY")) self.model_name = model_name def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str: """Make the class callable as required by smolagents""" try: prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt completion = self.client.chat.completions.create( model=self.model_name, messages=[{"role": "user", "content": prompt_str}], temperature=0.7, max_tokens=1024, stream=False ) return completion.choices[0].message.content if completion.choices else "Error: No response generated" except Exception as e: return f"Error generating response: {str(e)}" class NewsAnalysisAgent(CodeAgent): """Extended CodeAgent with news search and analysis capabilities""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._articles = [] self._search_results = [] @property def articles(self) -> List[Dict]: """Access stored article data""" return self._articles @property def search_results(self) -> List[Dict]: """Access stored search results""" return self._search_results def run(self, prompt: str) -> str: """Override run method to include context about available tools""" enhanced_prompt = f""" You are a news analysis assistant that can: - Search for recent news articles - Extract and analyze article content - Summarize key points - Identify trends and patterns Task: {prompt} Use the provided tools to search and analyze news content. """ return super().run(enhanced_prompt) @tool def search_news(query: str, max_results: int = 5) -> str: """Search for recent news articles using DuckDuckGo. Args: query: Search query string max_results: Maximum number of results to return Returns: str: Formatted string containing search results with titles and URLs """ try: with DDGS() as ddgs: search_results = list(ddgs.news( query, max_results=max_results, timeframe='d' # Last 24 hours )) # Store results in agent tool.agent._search_results = search_results # Format results formatted_results = [] for idx, result in enumerate(search_results, 1): formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n") return "\n".join(formatted_results) except Exception as e: return f"Error searching news: {str(e)}" @tool def analyze_article(url: str) -> str: """Extract and analyze content from a news article URL. Args: url: URL of the news article to analyze Returns: str: Analysis of the article including summary, key points, and entities """ try: # Download and parse article article = Article(url) article.download() article.parse() article.nlp() # Store article data article_data = { 'url': url, 'title': article.title, 'summary': article.summary, 'keywords': article.keywords, 'publish_date': article.publish_date } tool.agent._articles.append(article_data) # Format analysis analysis = f""" Title: {article.title} Summary: {article.summary} Key Points: {', '.join(article.keywords)} Publication Date: {article.publish_date} """ return analysis except Exception as e: return f"Error analyzing article: {str(e)}" @tool def identify_trends(articles: List[Dict] = None) -> str: """Identify common themes and trends across analyzed articles. Args: articles: List of analyzed article data (optional, uses stored articles if None) Returns: str: Analysis of trends and patterns found across articles """ articles = articles or tool.agent._articles if not articles: return "No articles available for trend analysis" # Collect all keywords all_keywords = [] for article in articles: all_keywords.extend(article.get('keywords', [])) # Count keyword frequencies keyword_freq = pd.Series(all_keywords).value_counts() # Format trends analysis trends = f""" Common Themes: {', '.join(keyword_freq.head().index)} Articles Analyzed: {len(articles)} Timespan: {min(a['publish_date'] for a in articles if a.get('publish_date'))} to {max(a['publish_date'] for a in articles if a.get('publish_date'))} """ return trends def main(): st.title("News Analysis Assistant") st.write("Search and analyze recent news articles with natural language interaction.") # Initialize session state if 'agent' not in st.session_state: st.session_state['agent'] = NewsAnalysisAgent( tools=[search_news, analyze_article, identify_trends], model=GroqLLM(), additional_authorized_imports=[ "newspaper", "nltk", "duckduckgo_search", "pandas" ] ) # News search interface search_query = st.text_input("Enter news search query:") if search_query: with st.spinner('Searching news...'): search_results = st.session_state['agent'].run( f"Use the search_news tool to find recent articles about: {search_query}" ) st.write(search_results) # Article analysis interface st.subheader("Article Analysis") article_url = st.text_input("Enter article URL to analyze:") if article_url: with st.spinner('Analyzing article...'): analysis = st.session_state['agent'].run( f"Use the analyze_article tool to analyze this article: {article_url}" ) st.write(analysis) # Trend analysis interface if st.button("Analyze Trends"): with st.spinner('Identifying trends...'): trends = st.session_state['agent'].run( "Use the identify_trends tool to analyze patterns across all articles" ) st.write(trends) # Custom analysis interface st.subheader("Custom Analysis") question = st.text_input("What would you like to know about the news?") if question: with st.spinner('Analyzing...'): result = st.session_state['agent'].run(question) st.write(result) if __name__ == "__main__": main()