import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
from transformers import pipeline
import plotly.express as px
from datetime import datetime, timedelta
file_path = '/home/user/app/Top 2000 Valued Companies with Ticker Symbols.xlsx'
companies_df = pd.read_excel(file_path)
def get_stock_symbol(company_name):
match = companies_df[companies_df['Name'].str.contains(company_name, case=False, na=False)]
if not match.empty:
return match.iloc[0]['Symbol']
return None
sentiment_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
def encode_special_characters(text):
encoded_text = ''
special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'}
for char in text.lower():
encoded_text += special_characters.get(char, char)
return encoded_text
def fetch_news(query, num_articles=10):
encoded_query = encode_special_characters(query)
url = f"https://news.google.com/search?q={encoded_query}&hl=en-US&gl=in&ceid=US%3Aen&num={num_articles}"
response = requests.get(url)
except requests.RequestException as e:
print(f"Error fetching news: {e}")
return pd.DataFrame()
soup = BeautifulSoup(response.text, 'html.parser')
articles = soup.find_all('article')
news_data = []
for article in articles[:num_articles]:
link = article.find('a')['href'].replace("./articles/", "https://news.google.com/articles/")
text_parts = article.get_text(separator='\n').split('\n')
'Title': text_parts[2] if len(text_parts) > 2 else 'Missing',
'Source': text_parts[0] if len(text_parts) > 0 else 'Missing',
'Time': text_parts[3] if len(text_parts) > 3 else 'Missing',
'Author': text_parts[4].split('By ')[-1] if len(text_parts) > 4 else 'Missing',
'Link': link
return pd.DataFrame(news_data)
def analyze_sentiment(text):
result = sentiment_model(text)[0]
return result['label'], result['score']
def fetch_stock_data(symbol):
url = "https://alpha-vantage.p.rapidapi.com/query"
querystring = {"function":"TIME_SERIES_DAILY", "symbol":symbol, "outputsize":"compact", "datatype":"json"}
headers = {
"x-rapidapi-key": "e078dae417mshb13ddc2d8149768p1608e9jsn888ce49e8554",
"x-rapidapi-host": "alpha-vantage.p.rapidapi.com"
response = requests.get(url, headers=headers, params=querystring)
data = response.json()
if "Time Series (Daily)" not in data:
return pd.DataFrame()
stock_data = pd.DataFrame(data["Time Series (Daily)"]).T
stock_data.index = pd.to_datetime(stock_data.index)
stock_data.columns = ["Open", "High", "Low", "Close", "Volume"]
return stock_data
def news_and_analysis(query):
news_df = fetch_news(query)
if news_df.empty:
return "No news articles found.", None, None
news_df['Sentiment'], news_df['Sentiment_Score'] = zip(*news_df['Title'].apply(analyze_sentiment))
sentiment_fig = px.bar(
color_discrete_map={'positive': 'green', 'neutral': 'gray', 'negative': 'red'},
title='News Sentiment Over Time',
labels={'Time': 'Publication Time', 'Sentiment_Score': 'Sentiment Score'}
stock_symbol = get_stock_symbol(query)
if stock_symbol:
stock_data = fetch_stock_data(stock_symbol)
if not stock_data.empty:
stock_fig = px.line(stock_data, x=stock_data.index, y='Close', title=f'{stock_symbol} Stock Price')
return news_df, sentiment_fig, stock_fig
return news_df, sentiment_fig, None
with gr.Blocks() as demo:
# Financial News Sentiment Analysis
Analyze the sentiment of news articles related to financial topics or companies.
Enter a topic or company name to get started.
with gr.Row():
with gr.Column():
topic = gr.Textbox(label="Enter a financial topic or company name", placeholder="e.g., Apple Inc.")
analyze_btn = gr.Button(value="Analyze")
with gr.Column():
news_output = gr.DataFrame(label="News and Sentiment Analysis")
sentiment_plot = gr.Plot(label="Sentiment Analysis")
stock_plot = gr.Plot(label="Stock Price Movement")
outputs=[news_output, sentiment_plot, stock_plot]
if __name__ == "__main__":