import streamlit as st import pandas as pd from smolagents import CodeAgent, tool from typing import Union, List, Dict, Optional from duckduckgo_search import DDGS import requests from bs4 import BeautifulSoup from datetime import datetime from groq import Groq import os import re from dataclasses import dataclass @dataclass class SearchResult: """Data class to store search results from news searches""" title: str link: str date: str class GroqLLM: """Compatible LLM interface for smolagents CodeAgent""" def __init__(self, model_name: str = "llama-3.1-8B-Instant"): self.client = Groq(api_key=os.environ.get("GROQ_API_KEY")) self.model_name = model_name def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str: """Process the prompt and return the LLM's response""" try: prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt completion = self.client.chat.completions.create( model=self.model_name, messages=[{"role": "user", "content": prompt_str}], temperature=0.7, max_tokens=1024, stream=False ) return completion.choices[0].message.content if completion.choices else "Error: No response generated" except Exception as e: return f"Error generating response: {str(e)}" class NewsAnalysisAgent(CodeAgent): """Extended CodeAgent with news search and analysis capabilities""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._articles = [] self._search_results = [] @property def articles(self) -> List[Dict]: """Access stored article data""" return self._articles @property def search_results(self) -> List[SearchResult]: """Access stored search results""" return self._search_results def run(self, prompt: str) -> str: """Execute the agent with the given prompt""" enhanced_prompt = f""" You are a news analysis assistant that can: - Search for recent news articles - Extract and analyze article content - Summarize key points - Identify trends and patterns Task: {prompt} Use the provided tools to search and analyze news content. """ return super().run(enhanced_prompt) def extract_text_from_url(url: str) -> str: """Extract main text content from a given URL using BeautifulSoup. Args: url: The URL of the webpage to extract text from Returns: str: Extracted and cleaned text content from the webpage """ try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Remove unwanted elements for element in soup(['script', 'style', 'nav', 'header', 'footer']): element.decompose() # Extract text from paragraphs paragraphs = soup.find_all('p') text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip()) return re.sub(r'\s+', ' ', text) except Exception as e: return f"Error extracting text: {str(e)}" @tool def search_news(query: str, max_results: Optional[int] = 5) -> str: """Search for recent news articles using DuckDuckGo. Args: query: Search query string to find relevant news articles max_results: Maximum number of results to return (default: 5) Returns: str: Formatted string containing search results with titles and URLs """ try: with DDGS() as ddgs: search_results = list(ddgs.news( query, max_results=max_results or 5, timeframe='d' )) # Store results in agent tool.agent._search_results = [ SearchResult(title=r['title'], link=r['link'], date=r['date']) for r in search_results ] # Format results formatted_results = [] for idx, result in enumerate(search_results, 1): formatted_results.append( f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n" ) return "\n".join(formatted_results) except Exception as e: return f"Error searching news: {str(e)}" @tool def analyze_article(url: str) -> str: """Extract and analyze content from a news article URL. Args: url: URL of the news article to analyze Returns: str: Analysis of the article including summary, key points, and main themes """ try: content = extract_text_from_url(url) analysis_prompt = f""" Please analyze this article content and provide: 1. A brief summary (2-3 sentences) 2. Key points (3-5 main takeaways) 3. Main topics/themes discussed Article content: {content[:3000]} """ analysis = tool.agent.model(analysis_prompt) # Store article data article_data = { 'url': url, 'content': content[:1000], 'analysis': analysis, 'date': datetime.now().strftime('%Y-%m-%d') } tool.agent._articles.append(article_data) return analysis except Exception as e: return f"Error analyzing article: {str(e)}" @tool def identify_trends(articles: Optional[List[Dict]] = None) -> str: """Identify common themes and trends across analyzed articles. Args: articles: Optional list of analyzed article data. If None, uses stored articles. Returns: str: Analysis of trends and patterns found across the articles """ articles = articles or tool.agent._articles if not articles: return "No articles available for trend analysis" combined_analyses = "\n".join(article['analysis'] for article in articles) trend_prompt = f""" Based on the analyses of {len(articles)} articles, please identify: 1. Common themes or topics across articles 2. Any notable patterns or trends 3. Different perspectives or viewpoints presented Combined analyses: {combined_analyses} """ return tool.agent.model(trend_prompt) def main(): st.title("News Analysis Assistant") st.write("Search and analyze recent news articles with natural language interaction.") # Initialize session state if 'agent' not in st.session_state: st.session_state['agent'] = NewsAnalysisAgent( tools=[search_news, analyze_article, identify_trends], model=GroqLLM(), additional_authorized_imports=[ "requests", "bs4", "duckduckgo_search", "pandas" ] ) # News search interface search_query = st.text_input("Enter news search query:") if search_query: with st.spinner('Searching news...'): search_results = st.session_state['agent'].run( f"Use the search_news tool to find recent articles about: {search_query}" ) st.write(search_results) # Article analysis interface st.subheader("Article Analysis") article_url = st.text_input("Enter article URL to analyze:") if article_url: with st.spinner('Analyzing article...'): analysis = st.session_state['agent'].run( f"Use the analyze_article tool to analyze this article: {article_url}" ) st.write(analysis) # Trend analysis interface if st.button("Analyze Trends"): with st.spinner('Identifying trends...'): trends = st.session_state['agent'].run( "Use the identify_trends tool to analyze patterns across all articles" ) st.write(trends) if __name__ == "__main__": main()