import streamlit as st import pandas as pd from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import matplotlib.pyplot as plt import time import os import os # Use a custom cache directory for Hugging Face models os.environ["HF_HOME"] = "./hf_cache" # Ensure directory exists and is writable os.makedirs("./hf_cache", exist_ok=True) model_name = "tabularisai/multilingual-sentiment-analysis" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Function for sentiment prediction with progress bar def predict_sentiment(texts): sentiments = [] sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"} progress_bar = st.progress(0) total_texts = len(texts) for i, text in enumerate(texts): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) sentiment = sentiment_map[torch.argmax(probabilities, dim=-1).item()] sentiments.append(sentiment) # Update progress bar progress_bar.progress((i + 1) / total_texts) time.sleep(0.1) # Optional: To better visualize progress return sentiments # Streamlit UI st.title("Sentiment Analysis App") st.write("Upload a CSV or Excel file containing text data for sentiment analysis.") # File upload # uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"]) uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"], accept_multiple_files=False) if uploaded_file is not None: try: # Read file if uploaded_file.name.endswith(".csv"): df = pd.read_csv(uploaded_file) else: df = pd.read_excel(uploaded_file) st.write("Dataset Preview:") st.dataframe(df.head()) # Select text column text_column = st.selectbox("Select the text column for analysis", df.columns) except Exception as e: st.error(f"Error reading file: {e}") if st.button("Analyze Sentiment"): # Get text data texts = df[text_column].astype(str).tolist() # Predict sentiments with progress bar sentiments = predict_sentiment(texts) df["Sentiment"] = sentiments # Display results st.write("Sentiment Analysis Results:") st.dataframe(df[[text_column, "Sentiment"]]) # Pie chart of sentiment distribution st.write("Sentiment Distribution:") sentiment_counts = df["Sentiment"].value_counts() fig, ax = plt.subplots() ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct="%1.1f%%", startangle=90) ax.axis("equal") st.pyplot(fig)