Spaces:

girishwangikar
/

SmolAgent_News_Analysis

Running

App Files Files Community

girishwangikar commited on 28 days ago

Commit

91818a9

verified ·

1 Parent(s): e4fc134

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -61

app.py CHANGED Viewed

@@ -1,23 +1,30 @@
 import streamlit as st
 import pandas as pd
 from smolagents import CodeAgent, tool
-from typing import Union, List, Dict
 from duckduckgo_search import DDGS
 import requests
 from bs4 import BeautifulSoup
-from datetime import datetime, timedelta
 from groq import Groq
 import os
 import re
 class GroqLLM:
     """Compatible LLM interface for smolagents CodeAgent"""
-    def __init__(self, model_name="llama-3.1-8B-Instant"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
-        """Make the class callable as required by smolagents"""
         try:
             prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
             completion = self.client.chat.completions.create(
@@ -40,16 +47,13 @@ class NewsAnalysisAgent(CodeAgent):
     @property
     def articles(self) -> List[Dict]:
-        """Access stored article data"""
         return self._articles
     @property
-    def search_results(self) -> List[Dict]:
-        """Access stored search results"""
         return self._search_results
     def run(self, prompt: str) -> str:
-        """Override run method to include context about available tools"""
         enhanced_prompt = f"""
         You are a news analysis assistant that can:
         - Search for recent news articles
@@ -67,74 +71,84 @@ def extract_text_from_url(url: str) -> str:
     """Helper function to extract text content from a URL using BeautifulSoup"""
     try:
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove scripts, styles, and navigation elements
-        for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
             element.decompose()
         # Extract text from paragraphs
         paragraphs = soup.find_all('p')
         text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
-        # Basic cleaning
-        text = re.sub(r'\s+', ' ', text)
-        return text
     except Exception as e:
         return f"Error extracting text: {str(e)}"
-@tool
-def search_news(query: str, max_results: int = 5) -> str:
-    """Search for recent news articles using DuckDuckGo.
-    Args:
-        query: Search query string
-        max_results: Maximum number of results to return
-    Returns:
-        str: Formatted string containing search results with titles and URLs
-    """
     try:
         with DDGS() as ddgs:
             search_results = list(ddgs.news(
                 query,
-                max_results=max_results,
-                timeframe='d'  # Last 24 hours
             ))
         # Store results in agent
-        tool.agent._search_results = search_results
         # Format results
         formatted_results = []
         for idx, result in enumerate(search_results, 1):
-            formatted_results.append(f"{idx}. {result['title']}\n   URL: {result['link']}\n   Date: {result['date']}\n")
         return "\n".join(formatted_results)
     except Exception as e:
         return f"Error searching news: {str(e)}"
-@tool
 def analyze_article(url: str) -> str:
-    """Extract and analyze content from a news article URL.
-    Args:
-        url: URL of the news article to analyze
-    Returns:
-        str: Analysis of the article including summary and key points
-    """
     try:
-        # Extract text content
         content = extract_text_from_url(url)
-        # Use LLM to generate summary and analysis
         analysis_prompt = f"""
         Please analyze this article content and provide:
         1. A brief summary (2-3 sentences)
@@ -142,7 +156,7 @@ def analyze_article(url: str) -> str:
         3. Main topics/themes discussed
         Article content:
-        {content[:3000]}  # Limit content length for token constraints
         """
         analysis = tool.agent.model(analysis_prompt)
@@ -150,7 +164,7 @@ def analyze_article(url: str) -> str:
         # Store article data
         article_data = {
             'url': url,
-            'content': content[:1000],  # Store truncated content
             'analysis': analysis,
             'date': datetime.now().strftime('%Y-%m-%d')
         }
@@ -160,22 +174,24 @@ def analyze_article(url: str) -> str:
     except Exception as e:
         return f"Error analyzing article: {str(e)}"
-@tool
-def identify_trends(articles: List[Dict] = None) -> str:
-    """Identify common themes and trends across analyzed articles.
-    Args:
-        articles: List of analyzed article data (optional, uses stored articles if None)
-    Returns:
-        str: Analysis of trends and patterns found across articles
-    """
     articles = articles or tool.agent._articles
     if not articles:
         return "No articles available for trend analysis"
-    # Combine all analyses for trend identification
     combined_analyses = "\n".join(article['analysis'] for article in articles)
     trend_prompt = f"""
@@ -230,14 +246,6 @@ def main():
                 "Use the identify_trends tool to analyze patterns across all articles"
             )
             st.write(trends)
-    # Custom analysis interface
-    st.subheader("Custom Analysis")
-    question = st.text_input("What would you like to know about the news?")
-    if question:
-        with st.spinner('Analyzing...'):
-            result = st.session_state['agent'].run(question)
-            st.write(result)
 if __name__ == "__main__":
     main()

 import streamlit as st
 import pandas as pd
 from smolagents import CodeAgent, tool
+from typing import Union, List, Dict, Optional
 from duckduckgo_search import DDGS
 import requests
 from bs4 import BeautifulSoup
+from datetime import datetime
 from groq import Groq
 import os
 import re
+from dataclasses import dataclass
+@dataclass
+class SearchResult:
+    """Data class to store search results"""
+    title: str
+    link: str
+    date: str
 class GroqLLM:
     """Compatible LLM interface for smolagents CodeAgent"""
+    def __init__(self, model_name: str = "llama-3.1-8B-Instant"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
         try:
             prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
             completion = self.client.chat.completions.create(
     @property
     def articles(self) -> List[Dict]:
         return self._articles
     @property
+    def search_results(self) -> List[SearchResult]:
         return self._search_results
     def run(self, prompt: str) -> str:
         enhanced_prompt = f"""
         You are a news analysis assistant that can:
         - Search for recent news articles
     """Helper function to extract text content from a URL using BeautifulSoup"""
     try:
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
         }
         response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Remove unwanted elements
+        for element in soup(['script', 'style', 'nav', 'header', 'footer']):
             element.decompose()
         # Extract text from paragraphs
         paragraphs = soup.find_all('p')
         text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
+        return re.sub(r'\s+', ' ', text)
     except Exception as e:
         return f"Error extracting text: {str(e)}"
+@tool(
+    name="search_news",
+    description="Search for recent news articles using DuckDuckGo",
+    inputs={
+        "query": {
+            "type": "string",
+            "description": "Search query string"
+        },
+        "max_results": {
+            "type": "integer",
+            "description": "Maximum number of results to return",
+            "default": 5,
+            "nullable": True
+        }
+    }
+)
+def search_news(query: str, max_results: Optional[int] = 5) -> str:
+    """Search for recent news articles using DuckDuckGo."""
     try:
         with DDGS() as ddgs:
             search_results = list(ddgs.news(
                 query,
+                max_results=max_results or 5,
+                timeframe='d'
             ))
         # Store results in agent
+        tool.agent._search_results = [
+            SearchResult(title=r['title'], link=r['link'], date=r['date'])
+            for r in search_results
+        ]
         # Format results
         formatted_results = []
         for idx, result in enumerate(search_results, 1):
+            formatted_results.append(
+                f"{idx}. {result['title']}\n   URL: {result['link']}\n   Date: {result['date']}\n"
+            )
         return "\n".join(formatted_results)
     except Exception as e:
         return f"Error searching news: {str(e)}"
+@tool(
+    name="analyze_article",
+    description="Extract and analyze content from a news article URL",
+    inputs={
+        "url": {
+            "type": "string",
+            "description": "URL of the news article to analyze"
+        }
+    }
+)
 def analyze_article(url: str) -> str:
+    """Extract and analyze content from a news article URL."""
     try:
         content = extract_text_from_url(url)
         analysis_prompt = f"""
         Please analyze this article content and provide:
         1. A brief summary (2-3 sentences)
         3. Main topics/themes discussed
         Article content:
+        {content[:3000]}
         """
         analysis = tool.agent.model(analysis_prompt)
         # Store article data
         article_data = {
             'url': url,
+            'content': content[:1000],
             'analysis': analysis,
             'date': datetime.now().strftime('%Y-%m-%d')
         }
     except Exception as e:
         return f"Error analyzing article: {str(e)}"
+@tool(
+    name="identify_trends",
+    description="Identify common themes and trends across analyzed articles",
+    inputs={
+        "articles": {
+            "type": "array",
+            "description": "List of analyzed articles",
+            "nullable": True
+        }
+    }
+)
+def identify_trends(articles: Optional[List[Dict]] = None) -> str:
+    """Identify common themes and trends across analyzed articles."""
     articles = articles or tool.agent._articles
     if not articles:
         return "No articles available for trend analysis"
     combined_analyses = "\n".join(article['analysis'] for article in articles)
     trend_prompt = f"""
                 "Use the identify_trends tool to analyze patterns across all articles"
             )
             st.write(trends)
 if __name__ == "__main__":
     main()