Spaces:
Running
Running
from smolagents import Tool | |
from typing import Any, Optional | |
class SimpleTool(Tool): | |
name = "web_content_analyzer" | |
description = "Analyzes web content using AI models." | |
inputs = {"url":{"type":"string","description":"The webpage URL to analyze."}} | |
output_type = "string" | |
def forward(self, url: str) -> str: | |
"""Analyzes web content using AI models. | |
Args: | |
url: The webpage URL to analyze. | |
Returns: | |
str: Analysis results in JSON format. | |
""" | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
from transformers import pipeline | |
import json | |
try: | |
# Fetch content | |
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'} | |
response = requests.get(url, headers=headers, timeout=10) | |
# Parse HTML | |
soup = BeautifulSoup(response.text, 'html.parser') | |
for tag in soup(['script', 'style', 'meta']): | |
tag.decompose() | |
# Extract basic info | |
title = soup.title.string if soup.title else "No title found" | |
text = re.sub(r'\s+', ' ', soup.get_text()).strip() | |
if len(text) < 100: | |
return json.dumps({ | |
"error": "Not enough content to analyze" | |
}) | |
# Get summary | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
summary = summarizer(text[:1024], max_length=100, min_length=30)[0]['summary_text'] | |
# Get sentiment | |
classifier = pipeline("text-classification", | |
model="nlptown/bert-base-multilingual-uncased-sentiment") | |
sentiment = classifier(text[:512])[0] | |
score = int(sentiment['label'][0]) | |
mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1] | |
# Format results | |
result = { | |
"title": title, | |
"summary": summary, | |
"sentiment": f"{mood} ({score}/5)", | |
"stats": { | |
"words": len(text.split()), | |
"chars": len(text) | |
} | |
} | |
return json.dumps(result) | |
except Exception as e: | |
return json.dumps({ | |
"error": str(e) | |
}) |