shresht8 commited on
Commit
50fe41a
·
verified ·
1 Parent(s): bfa43e3

update app - extract likes/dislikes

Browse files
Files changed (3) hide show
  1. app.py +318 -343
  2. prompts.py +34 -0
  3. requirements.txt +2 -0
app.py CHANGED
@@ -1,344 +1,319 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import torch
4
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
- import plotly.express as px
6
- import plotly.graph_objects as go
7
- from collections import defaultdict
8
-
9
- # Load model and tokenizer globally for efficiency
10
- model_name = "tabularisai/multilingual-sentiment-analysis"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
13
-
14
- # Define sentiment weights for score calculation
15
- SENTIMENT_WEIGHTS = {
16
- 0: 0.0, # Very Negative
17
- 1: 0.25, # Negative
18
- 2: 0.5, # Neutral
19
- 3: 0.75, # Positive
20
- 4: 1.0 # Very Positive
21
- }
22
-
23
-
24
- def predict_sentiment_with_scores(texts):
25
- """
26
- Predict sentiment for a list of texts and return both class labels and sentiment scores
27
- """
28
- inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
29
- with torch.no_grad():
30
- outputs = model(**inputs)
31
-
32
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
33
-
34
- # Get predicted classes
35
- sentiment_map = {
36
- 0: "Very Negative",
37
- 1: "Negative",
38
- 2: "Neutral",
39
- 3: "Positive",
40
- 4: "Very Positive"
41
- }
42
- predicted_classes = [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]
43
-
44
- # Calculate sentiment scores (0-100)
45
- sentiment_scores = []
46
- for prob in probabilities:
47
- # Weighted sum of probabilities
48
- score = sum(prob[i].item() * SENTIMENT_WEIGHTS[i] for i in range(len(prob)))
49
- # Scale to 0-100
50
- sentiment_scores.append(round(score * 100, 2))
51
-
52
- return predicted_classes, sentiment_scores
53
-
54
-
55
- def process_single_sheet(df, product_name):
56
- """
57
- Process a single dataframe and return sentiment analysis results
58
- """
59
- if 'Reviews' not in df.columns:
60
- raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}")
61
-
62
- reviews = df['Reviews'].fillna("")
63
- sentiments, scores = predict_sentiment_with_scores(reviews.tolist())
64
-
65
- df['Sentiment'] = sentiments
66
- df['Sentiment_Score'] = scores
67
-
68
- # Calculate sentiment distribution
69
- sentiment_counts = pd.Series(sentiments).value_counts()
70
- avg_sentiment_score = round(sum(scores) / len(scores), 2)
71
-
72
- return df, sentiment_counts, avg_sentiment_score
73
-
74
-
75
- def create_comparison_charts(sentiment_results, avg_scores):
76
- """
77
- Create investment-focused comparison charts including the new sentiment score visualization
78
- """
79
- # Prepare data for plotting
80
- plot_data = []
81
- for product, sentiment_counts in sentiment_results.items():
82
- sentiment_dict = sentiment_counts.to_dict()
83
- total = sum(sentiment_dict.values())
84
-
85
- row = {
86
- 'Product': product,
87
- 'Total Reviews': total
88
- }
89
- # Calculate percentages for each sentiment
90
- for sentiment, count in sentiment_dict.items():
91
- row[sentiment] = (count / total) * 100
92
- plot_data.append(row)
93
-
94
- df = pd.DataFrame(plot_data)
95
-
96
- # Ensure all sentiment columns exist in the correct order
97
- sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative']
98
- for sentiment in sentiments:
99
- if sentiment not in df.columns:
100
- df[sentiment] = 0
101
-
102
- # Calculate weighted sentiment score (0 to 100)
103
- sentiment_weights = {
104
- 'Very Negative': 0,
105
- 'Negative': 25,
106
- 'Neutral': 50,
107
- 'Positive': 75,
108
- 'Very Positive': 100
109
- }
110
-
111
- # Create stacked bar chart for sentiment distribution
112
- distribution_fig = go.Figure()
113
- sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative']
114
- colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)',
115
- 'rgb(241, 196, 15)', 'rgb(231, 76, 60)',
116
- 'rgb(192, 57, 43)']
117
-
118
- for sentiment, color in zip(sentiments, colors):
119
- distribution_fig.add_trace(go.Bar(
120
- name=sentiment,
121
- x=df['Product'],
122
- y=df[sentiment],
123
- marker_color=color
124
- ))
125
-
126
- distribution_fig.update_layout(
127
- barmode='stack',
128
- title='Sentiment Distribution by Product',
129
- yaxis_title='Percentage (%)',
130
- showlegend=True
131
- )
132
-
133
- # Calculate Positive-Negative Ratios
134
- df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1)
135
- df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1)
136
-
137
- # Create Positive-Negative ratio chart
138
- ratio_fig = go.Figure()
139
- ratio_fig.add_trace(go.Bar(
140
- name='Positive',
141
- x=df['Product'],
142
- y=df['Positive Ratio'],
143
- marker_color='rgb(50, 205, 50)'
144
- ))
145
- ratio_fig.add_trace(go.Bar(
146
- name='Negative',
147
- x=df['Product'],
148
- y=df['Negative Ratio'],
149
- marker_color='rgb(220, 20, 60)'
150
- ))
151
- ratio_fig.update_layout(
152
- barmode='group',
153
- title='Positive vs Negative Sentiment Ratio by Product',
154
- yaxis_title='Percentage (%)'
155
- )
156
-
157
- # Create summary DataFrame
158
- summary_data = {
159
- 'Product': df['Product'].tolist(),
160
- 'Total Reviews': df['Total Reviews'].tolist(),
161
- 'Positive Ratio (%)': df['Positive Ratio'].round(2).tolist(),
162
- 'Negative Ratio (%)': df['Negative Ratio'].round(2).tolist(),
163
- 'Neutral Ratio (%)': df['Neutral'].round(2).tolist(),
164
- 'Weighted Sentiment Score': [avg_scores[prod] for prod in df['Product']]
165
- }
166
- summary_df = pd.DataFrame(summary_data)
167
-
168
- # Create sentiment score chart
169
- score_comparison_fig = go.Figure()
170
- score_comparison_fig.add_trace(go.Bar(
171
- x=summary_df['Product'],
172
- y=summary_df['Weighted Sentiment Score'],
173
- text=[f"{score:.1f}" for score in summary_df['Weighted Sentiment Score']],
174
- textposition='auto',
175
- marker_color='rgb(65, 105, 225)',
176
- name='Sentiment Score'
177
- ))
178
- score_comparison_fig.update_layout(
179
- title='Weighted Sentiment Scores by Product (0-100)',
180
- yaxis_title='Sentiment Score',
181
- yaxis_range=[0, 100],
182
- showlegend=False,
183
- bargap=0.3,
184
- plot_bgcolor='white'
185
- )
186
-
187
- return score_comparison_fig, distribution_fig, ratio_fig, summary_df
188
-
189
- products = list(avg_scores.keys())
190
- scores = list(avg_scores.values())
191
-
192
- # Add bars for sentiment scores
193
- score_comparison_fig.add_trace(go.Bar(
194
- x=products,
195
- y=scores,
196
- text=[f"{score:.1f}" for score in scores],
197
- textposition='auto',
198
- marker_color='rgb(65, 105, 225)',
199
- name='Sentiment Score'
200
- ))
201
-
202
- # Update layout with appropriate styling
203
- score_comparison_fig.update_layout(
204
- title='Weighted Sentiment Scores by Product (0-100)',
205
- yaxis_title='Sentiment Score',
206
- yaxis_range=[0, 100],
207
- showlegend=False,
208
- bargap=0.3,
209
- plot_bgcolor='white'
210
- )
211
-
212
- # Add score to summary DataFrame
213
- summary_df['Weighted Sentiment Score'] = [avg_scores[prod] for prod in summary_df['Product']]
214
-
215
- # Create sentiment distribution stacked bar chart
216
- distribution_fig = go.Figure()
217
- colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)',
218
- 'rgb(241, 196, 15)', 'rgb(231, 76, 60)',
219
- 'rgb(192, 57, 43)']
220
-
221
- # Add traces for each sentiment in order
222
- for sentiment, color in zip(sentiments, colors):
223
- distribution_fig.add_trace(go.Bar(
224
- name=sentiment,
225
- x=df['Product'],
226
- y=df[sentiment],
227
- marker_color=color
228
- ))
229
-
230
- distribution_fig.update_layout(
231
- barmode='stack',
232
- title='Sentiment Distribution by Product',
233
- yaxis_title='Percentage (%)',
234
- showlegend=True
235
- )
236
-
237
- return score_comparison_fig, distribution_fig, summary_df, output_path
238
-
239
-
240
- def process_file(file_obj):
241
- """
242
- Process the input file and add sentiment analysis results
243
- """
244
- try:
245
- file_path = file_obj.name
246
- sentiment_results = defaultdict(pd.Series)
247
- avg_sentiment_scores = {}
248
- all_processed_dfs = {}
249
-
250
- if file_path.endswith('.csv'):
251
- df = pd.read_csv(file_path)
252
- product_name = "Product" # Default name for CSV
253
- processed_df, sentiment_counts, avg_score = process_single_sheet(df, product_name)
254
- all_processed_dfs[product_name] = processed_df
255
- sentiment_results[product_name] = sentiment_counts
256
- avg_sentiment_scores[product_name] = avg_score
257
-
258
- elif file_path.endswith(('.xlsx', '.xls')):
259
- excel_file = pd.ExcelFile(file_path)
260
- for sheet_name in excel_file.sheet_names:
261
- df = pd.read_excel(file_path, sheet_name=sheet_name)
262
- processed_df, sentiment_counts, avg_score = process_single_sheet(df, sheet_name)
263
- all_processed_dfs[sheet_name] = processed_df
264
- sentiment_results[sheet_name] = sentiment_counts
265
- avg_sentiment_scores[sheet_name] = avg_score
266
- else:
267
- raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
268
-
269
- # Create visualizations with new sentiment score chart
270
- score_comparison_fig, distribution_fig, ratio_fig, summary_df = create_comparison_charts(
271
- sentiment_results, avg_sentiment_scores
272
- )
273
-
274
- # Save results
275
- output_path = "sentiment_analysis_results.xlsx"
276
- with pd.ExcelWriter(output_path) as writer:
277
- for sheet_name, df in all_processed_dfs.items():
278
- df.to_excel(writer, sheet_name=sheet_name, index=False)
279
- if isinstance(summary_df, pd.DataFrame): # Safety check
280
- summary_df.to_excel(writer, sheet_name='Summary', index=False)
281
-
282
- # Save results
283
- output_path = "sentiment_analysis_results.xlsx"
284
- with pd.ExcelWriter(output_path) as writer:
285
- # Save individual sheet data
286
- for sheet_name, df in all_processed_dfs.items():
287
- df.to_excel(writer, sheet_name=sheet_name, index=False)
288
-
289
- # Save summary data
290
- if isinstance(summary_df, pd.DataFrame): # Ensure it's a DataFrame before saving
291
- summary_df.to_excel(writer, sheet_name='Summary', index=False)
292
-
293
- return score_comparison_fig, distribution_fig, summary_df, output_path
294
-
295
- except Exception as e:
296
- raise gr.Error(str(e))
297
-
298
-
299
- # Update the Gradio interface
300
- with gr.Blocks() as interface:
301
- gr.Markdown("# Product Review Sentiment Analysis")
302
-
303
- gr.Markdown("""
304
- ### Quick Guide
305
- 1. **Excel File (Multiple Products)**:
306
- - Create separate sheets for each product
307
- - Name sheets with product/company names
308
- - Include "Reviews" column in each sheet
309
-
310
- 2. **CSV File (Single Product)**:
311
- - Include "Reviews" column
312
-
313
- Upload your file and click Analyze to get started.
314
- """)
315
-
316
- with gr.Row():
317
- file_input = gr.File(
318
- label="Upload File (CSV or Excel)",
319
- file_types=[".csv", ".xlsx", ".xls"]
320
- )
321
-
322
- with gr.Row():
323
- analyze_btn = gr.Button("Analyze Sentiments")
324
-
325
- with gr.Row():
326
- sentiment_score_plot = gr.Plot(label="Weighted Sentiment Scores")
327
-
328
- with gr.Row():
329
- distribution_plot = gr.Plot(label="Sentiment Distribution")
330
-
331
- with gr.Row():
332
- summary_table = gr.Dataframe(label="Summary Metrics")
333
-
334
- with gr.Row():
335
- output_file = gr.File(label="Download Full Report")
336
-
337
- analyze_btn.click(
338
- fn=process_file,
339
- inputs=[file_input],
340
- outputs=[sentiment_score_plot, distribution_plot, summary_table, output_file]
341
- )
342
-
343
- # Launch interface
344
  interface.launch()
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from collections import defaultdict
8
+ from openai import OpenAI
9
+ from pydantic import BaseModel, Field, field_validator, ValidationInfo
10
+ from typing import Optional, Dict, Any, List, Annotated
11
+ from instructor import patch
12
+ import instructor
13
+ from prompts import sentiments_prompt
14
+
15
+ # Load model and tokenizer globally for efficiency
16
+ model_name = "tabularisai/multilingual-sentiment-analysis"
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
19
+
20
+ # Define sentiment weights for score calculation
21
+ SENTIMENT_WEIGHTS = {
22
+ 0: 0.0, # Very Negative
23
+ 1: 0.25, # Negative
24
+ 2: 0.5, # Neutral
25
+ 3: 0.75, # Positive
26
+ 4: 1.0 # Very Positive
27
+ }
28
+
29
+ class ExtractProductSentiment(BaseModel):
30
+ """Extracts what people like and dislike about a product based on product reviews and sentiment scores (0-100)"""
31
+ product_likes: List[str] = Field(..., description="What people like about the product. List of 3 sentences AT MOST. Must be aggregated in the order of importance.")
32
+ product_dislikes: List[str] = Field(..., description="What people dislike about the product. List of 3 sentences AT MOST. Must be aggregated in the order of importance.")
33
+
34
+ @field_validator("product_likes", "product_dislikes")
35
+ def validate_product_likes_and_dislikes(cls, v, info: ValidationInfo):
36
+ if not v:
37
+ raise ValueError(f"At least one {info.field_name} must be provided. If nothing to say, please enter 'None'")
38
+
39
+ if len(v) > 3:
40
+ raise ValueError(
41
+ f"{info.field_name} contains {len(v)} points. Please aggregate the points to a maximum of 3 key points "
42
+ "in order of importance. Combine similar points together."
43
+ )
44
+ return v
45
+
46
+ def predict_sentiment_with_scores(texts):
47
+ """
48
+ Predict sentiment for a list of texts and return both class labels and sentiment scores
49
+ """
50
+ inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
51
+ with torch.no_grad():
52
+ outputs = model(**inputs)
53
+
54
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
55
+
56
+ # Get predicted classes
57
+ sentiment_map = {
58
+ 0: "Very Negative",
59
+ 1: "Negative",
60
+ 2: "Neutral",
61
+ 3: "Positive",
62
+ 4: "Very Positive"
63
+ }
64
+ predicted_classes = [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]
65
+
66
+ # Calculate sentiment scores (0-100)
67
+ sentiment_scores = []
68
+ for prob in probabilities:
69
+ # Weighted sum of probabilities
70
+ score = sum(prob[i].item() * SENTIMENT_WEIGHTS[i] for i in range(len(prob)))
71
+ # Scale to 0-100
72
+ sentiment_scores.append(round(score * 100, 2))
73
+
74
+ return predicted_classes, sentiment_scores
75
+
76
+ #patch() # Patch OpenAI client to support response models
77
+
78
+ def get_product_sentiment(client, reviews: List[str], scores: List[float]) -> ExtractProductSentiment:
79
+ """Extract product likes and dislikes using OpenAI"""
80
+ # Combine reviews and scores for context
81
+ review_context = "\n".join([f"Review (Score: {score}): {review}"
82
+ for review, score in zip(reviews, scores)])
83
+ #client = instructor.patch(OpenAI(api_key=openai_api_key))
84
+ prompt = sentiments_prompt.format(review_context=review_context)
85
+
86
+ response = client.chat.completions.create(
87
+ model="gpt-4o",
88
+ response_model=ExtractProductSentiment,
89
+ messages=[
90
+ {"role": "system", "content": "You are a helpful product analyst."},
91
+ {"role": "user", "content": prompt}
92
+ ],
93
+ max_retries=3
94
+ )
95
+ return response
96
+
97
+ def create_comparison_charts(sentiment_results, avg_sentiment_scores):
98
+ """
99
+ Create comparison charts for sentiment analysis across products
100
+ """
101
+ # Create summary DataFrame
102
+ summary_data = []
103
+ for product in sentiment_results.keys():
104
+ counts = sentiment_results[product]
105
+ total = counts.sum()
106
+ row = {
107
+ 'Product': product,
108
+ 'Average Sentiment Score': avg_sentiment_scores[product],
109
+ 'Total Reviews': total,
110
+ 'Very Positive %': round((counts.get('Very Positive', 0) / total) * 100, 2),
111
+ 'Positive %': round((counts.get('Positive', 0) / total) * 100, 2),
112
+ 'Neutral %': round((counts.get('Neutral', 0) / total) * 100, 2),
113
+ 'Negative %': round((counts.get('Negative', 0) / total) * 100, 2),
114
+ 'Very Negative %': round((counts.get('Very Negative', 0) / total) * 100, 2)
115
+ }
116
+ summary_data.append(row)
117
+
118
+ summary_df = pd.DataFrame(summary_data)
119
+
120
+ # Score comparison chart
121
+ score_comparison_fig = px.bar(
122
+ summary_df,
123
+ x='Product',
124
+ y='Average Sentiment Score',
125
+ title='Average Sentiment Scores by Product',
126
+ labels={'Average Sentiment Score': 'Score (0-100)'}
127
+ )
128
+
129
+ # Distribution chart
130
+ distribution_data = []
131
+ for product in sentiment_results.keys():
132
+ counts = sentiment_results[product]
133
+ # Aggregate positive and negative sentiments
134
+ aggregated_counts = {
135
+ 'Positive': counts.get('Very Positive', 0) + counts.get('Positive', 0),
136
+ 'Neutral': counts.get('Neutral', 0),
137
+ 'Negative': counts.get('Very Negative', 0) + counts.get('Negative', 0)
138
+ }
139
+ for sentiment, count in aggregated_counts.items():
140
+ distribution_data.append({
141
+ 'Product': product,
142
+ 'Sentiment': sentiment,
143
+ 'Count': count
144
+ })
145
+
146
+ distribution_df = pd.DataFrame(distribution_data)
147
+ distribution_fig = px.bar(
148
+ distribution_df,
149
+ x='Product',
150
+ y='Count',
151
+ color='Sentiment',
152
+ title='Sentiment Distribution by Product',
153
+ barmode='group',
154
+ color_discrete_map={
155
+ 'Positive': '#2ECC71', # Green
156
+ 'Neutral': '#F1C40F', # Yellow
157
+ 'Negative': '#E74C3C' # Red
158
+ }
159
+ )
160
+
161
+ # Ratio chart (percentage stacked bar)
162
+ ratio_fig = px.bar(
163
+ distribution_df,
164
+ x='Product',
165
+ y='Count',
166
+ color='Sentiment',
167
+ title='Sentiment Distribution Ratio by Product',
168
+ barmode='relative'
169
+ )
170
+
171
+ return score_comparison_fig, distribution_fig, ratio_fig, summary_df
172
+
173
+ def process_single_sheet(df, product_name, openai_client):
174
+ """
175
+ Process a single dataframe and return sentiment analysis results
176
+ """
177
+ if 'Reviews' not in df.columns:
178
+ raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}")
179
+
180
+ reviews = df['Reviews'].fillna("")
181
+ sentiments, scores = predict_sentiment_with_scores(reviews.tolist())
182
+
183
+ df['Sentiment'] = sentiments
184
+ df['Sentiment_Score'] = scores
185
+
186
+ # Extract product likes and dislikes
187
+ try:
188
+ product_sentiment = get_product_sentiment(openai_client, reviews.tolist(), scores)
189
+
190
+ # Initialize empty columns
191
+ df['Likes'] = ""
192
+ df['Dislikes'] = ""
193
+
194
+ # Get the likes and dislikes lists
195
+ likes_list = product_sentiment.product_likes
196
+ dislikes_list = product_sentiment.product_dislikes
197
+
198
+ # Only populate the first N rows where N is the length of the likes/dislikes lists
199
+ for idx, (like, dislike) in enumerate(zip(likes_list, dislikes_list)):
200
+ df.loc[idx, 'Likes'] = like
201
+ df.loc[idx, 'Dislikes'] = dislike
202
+
203
+ except Exception as e:
204
+ print(f"Error extracting likes/dislikes for {product_name}: {str(e)}")
205
+ df['Likes'] = ""
206
+ df['Dislikes'] = ""
207
+
208
+ # Calculate sentiment distribution
209
+ sentiment_counts = pd.Series(sentiments).value_counts()
210
+ avg_sentiment_score = round(sum(scores) / len(scores), 2)
211
+
212
+ return df, sentiment_counts, avg_sentiment_score
213
+
214
+ def process_file(file_obj, api_key):
215
+ """
216
+ Process the input file and add sentiment analysis results
217
+ """
218
+ try:
219
+ if not api_key:
220
+ raise ValueError("OpenAI API key is required")
221
+
222
+ client = instructor.patch(OpenAI(api_key=api_key))
223
+
224
+ file_path = file_obj.name
225
+ sentiment_results = defaultdict(pd.Series)
226
+ avg_sentiment_scores = {}
227
+ all_processed_dfs = {}
228
+
229
+ if file_path.endswith('.csv'):
230
+ df = pd.read_csv(file_path)
231
+ product_name = "Product" # Default name for CSV
232
+ processed_df, sentiment_counts, avg_score = process_single_sheet(df, product_name, client)
233
+ all_processed_dfs[product_name] = processed_df
234
+ sentiment_results[product_name] = sentiment_counts
235
+ avg_sentiment_scores[product_name] = avg_score
236
+
237
+ elif file_path.endswith(('.xlsx', '.xls')):
238
+ excel_file = pd.ExcelFile(file_path)
239
+ for sheet_name in excel_file.sheet_names:
240
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
241
+ processed_df, sentiment_counts, avg_score = process_single_sheet(df, sheet_name, client)
242
+ all_processed_dfs[sheet_name] = processed_df
243
+ sentiment_results[sheet_name] = sentiment_counts
244
+ avg_sentiment_scores[sheet_name] = avg_score
245
+ else:
246
+ raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
247
+
248
+ # Create visualizations with new sentiment score chart
249
+ score_comparison_fig, distribution_fig, ratio_fig, summary_df = create_comparison_charts(
250
+ sentiment_results, avg_sentiment_scores
251
+ )
252
+
253
+ # Save results
254
+ output_path = "sentiment_analysis_results.xlsx"
255
+ with pd.ExcelWriter(output_path) as writer:
256
+ for sheet_name, df in all_processed_dfs.items():
257
+ df.to_excel(writer, sheet_name=sheet_name, index=False)
258
+ if isinstance(summary_df, pd.DataFrame): # Safety check
259
+ summary_df.to_excel(writer, sheet_name='Summary', index=False)
260
+
261
+ return score_comparison_fig, distribution_fig, summary_df, output_path
262
+
263
+ except Exception as e:
264
+ raise gr.Error(str(e))
265
+
266
+
267
+ # Update the Gradio interface
268
+ with gr.Blocks() as interface:
269
+ gr.Markdown("# Product Review Sentiment Analysis")
270
+
271
+ gr.Markdown("""
272
+ ### Quick Guide
273
+ 1. **Excel File (Multiple Products)**:
274
+ - Create separate sheets for each product
275
+ - Name sheets with product/company names
276
+ - Include "Reviews" column in each sheet
277
+
278
+ 2. **CSV File (Single Product)**:
279
+ - Include "Reviews" column
280
+
281
+ Upload your file and click Analyze to get started.
282
+ """)
283
+
284
+ with gr.Row():
285
+ api_key_input = gr.Textbox(
286
+ label="OpenAI API Key",
287
+ placeholder="Enter your OpenAI API key",
288
+ type="password"
289
+ )
290
+
291
+ with gr.Row():
292
+ file_input = gr.File(
293
+ label="Upload File (CSV or Excel)",
294
+ file_types=[".csv", ".xlsx", ".xls"]
295
+ )
296
+
297
+ with gr.Row():
298
+ analyze_btn = gr.Button("Analyze Sentiments")
299
+
300
+ with gr.Row():
301
+ sentiment_score_plot = gr.Plot(label="Weighted Sentiment Scores")
302
+
303
+ with gr.Row():
304
+ distribution_plot = gr.Plot(label="Sentiment Distribution")
305
+
306
+ with gr.Row():
307
+ summary_table = gr.Dataframe(label="Summary Metrics")
308
+
309
+ with gr.Row():
310
+ output_file = gr.File(label="Download Full Report")
311
+
312
+ analyze_btn.click(
313
+ fn=process_file,
314
+ inputs=[file_input, api_key_input],
315
+ outputs=[sentiment_score_plot, distribution_plot, summary_table, output_file]
316
+ )
317
+
318
+ # Launch interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  interface.launch()
prompts.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sentiments_prompt = """
2
+ Based on the following product reviews and their sentiment scores (0-100),
3
+ identify what people consistently like and dislike about the product. It can be about the product, the packaging, the delivery, the customer service, etc
4
+ but it doesn't have to be restricted to these. These are just examples on what people can like or dislike about the product.
5
+
6
+ So your task is to aggregate all the reviews together and identify 3 likes and 3 dislikes at most
7
+ in the order of what people like and dislike the most. If some aspects of the product are liked/ disliked the most in the reviews,
8
+ then they should be included first in the list and so on.
9
+
10
+ Remember your job is to aggregate the reviews and provide general likes and dislikes about the product.
11
+ Don't be specific to the individual reviews.
12
+
13
+ For example, likes can be (but not limited to):
14
+ - The product is easy to use
15
+ - The product is durable
16
+ - customer service is helpful.
17
+ - The shipping was fast.
18
+
19
+ Dislikes can be (but not limited to):
20
+ - The product was not secure
21
+ - The customer service helpline was slow to respond
22
+
23
+ These are just examples. You can include more likes and dislikes as long as they are relevant to the product.
24
+ While creating your output remember these points:
25
+ - Remember that products with higher sentiment scores are more likely to be liked by the customers and lower scores are more likely to be disliked.
26
+ - If 2 or more likes/dislikes are similar, then you can combine them into one. Make sure your each of your likes/dislikes are unique and not similar to each other.
27
+ - Make sure your output is in the order of importance.
28
+ - Dont make up likes/dislikes. Your likes/dislikes should be based on the reviews of this product ONLY. You can find those reviews in between the
29
+ <Product Reviews> and </Product Reviews> tags.
30
+
31
+ <Product Reviews>
32
+ {review_context}
33
+ </Product Reviews>
34
+ """
requirements.txt CHANGED
@@ -4,3 +4,5 @@ torch
4
  pandas
5
  gradio
6
  plotly
 
 
 
4
  pandas
5
  gradio
6
  plotly
7
+ openai
8
+ instructor