Zasha1 commited on
Commit
5126882
·
verified ·
1 Parent(s): 88456fa

Update sentiment_analysis.py

Browse files
Files changed (1) hide show
  1. sentiment_analysis.py +176 -176
sentiment_analysis.py CHANGED
@@ -1,177 +1,177 @@
1
- import os
2
- import json
3
- import time
4
- import pyaudio
5
- from vosk import Model, KaldiRecognizer
6
- from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
7
- from huggingface_hub import login
8
- from product_recommender import ProductRecommender
9
- from objection_handler import load_objections, check_objections # Ensure check_objections is imported
10
- from objection_handler import ObjectionHandler
11
- from env_setup import config
12
- from sentence_transformers import SentenceTransformer
13
- from dotenv import load_dotenv
14
-
15
- # Load environment variables
16
- load_dotenv()
17
-
18
- # Initialize the ProductRecommender
19
- product_recommender = ProductRecommender(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet2.csv")
20
-
21
- # Hugging Face API setup
22
- huggingface_api_key = config["huggingface_api_key"]
23
- login(token=huggingface_api_key)
24
-
25
- # Sentiment Analysis Model
26
- model_name = "tabularisai/multilingual-sentiment-analysis"
27
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
28
- tokenizer = AutoTokenizer.from_pretrained(model_name)
29
- sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
30
-
31
- # Vosk Speech Recognition Model
32
- vosk_model_path = config["vosk_model_path"]
33
-
34
- if not vosk_model_path:
35
- raise ValueError("Error: vosk_model_path is not set in the .env file.")
36
-
37
- try:
38
- vosk_model = Model(vosk_model_path)
39
- print("Vosk model loaded successfully.")
40
- except Exception as e:
41
- raise ValueError(f"Failed to load Vosk model: {e}")
42
-
43
- recognizer = KaldiRecognizer(vosk_model, 16000)
44
- audio = pyaudio.PyAudio()
45
-
46
- stream = audio.open(format=pyaudio.paInt16,
47
- channels=1,
48
- rate=16000,
49
- input=True,
50
- frames_per_buffer=4000)
51
- stream.start_stream()
52
-
53
- # Function to analyze sentiment
54
- def preprocess_text(text):
55
- """Preprocess text for better sentiment analysis."""
56
- # Strip whitespace and convert to lowercase
57
- processed = text.strip().lower()
58
- return processed
59
-
60
- def preprocess_text(text):
61
- """Preprocess text for better sentiment analysis."""
62
- return text.strip().lower()
63
-
64
- def analyze_sentiment(text):
65
- """Analyze sentiment of the text using Hugging Face model."""
66
- try:
67
- if not text.strip():
68
- return "NEUTRAL", 0.0
69
-
70
- processed_text = preprocess_text(text)
71
- result = sentiment_analyzer(processed_text)[0]
72
-
73
- print(f"Sentiment Analysis Result: {result}")
74
-
75
- # Map raw labels to sentiments
76
- sentiment_map = {
77
- 'Very Negative': "NEGATIVE",
78
- 'Negative': "NEGATIVE",
79
- 'Neutral': "NEUTRAL",
80
- 'Positive': "POSITIVE",
81
- 'Very Positive': "POSITIVE"
82
- }
83
-
84
- sentiment = sentiment_map.get(result['label'], "NEUTRAL")
85
- return sentiment, result['score']
86
-
87
- except Exception as e:
88
- print(f"Error in sentiment analysis: {e}")
89
- return "NEUTRAL", 0.5
90
-
91
- def transcribe_with_chunks(objections_dict):
92
- """Perform real-time transcription with sentiment analysis."""
93
- print("Say 'start listening' to begin transcription. Say 'stop listening' to stop.")
94
- is_listening = False
95
- chunks = []
96
- current_chunk = []
97
- chunk_start_time = time.time()
98
-
99
- # Initialize handlers with semantic search capabilities
100
- objection_handler = ObjectionHandler(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv")
101
- product_recommender = ProductRecommender(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet2.csv")
102
-
103
- # Load the embeddings model once
104
- model = SentenceTransformer('all-MiniLM-L6-v2')
105
-
106
- try:
107
- while True:
108
- data = stream.read(4000, exception_on_overflow=False)
109
-
110
- if recognizer.AcceptWaveform(data):
111
- result = recognizer.Result()
112
- text = json.loads(result)["text"]
113
-
114
- if "start listening" in text.lower():
115
- is_listening = True
116
- print("Listening started. Speak into the microphone.")
117
- continue
118
- elif "stop listening" in text.lower():
119
- is_listening = False
120
- print("Listening stopped.")
121
- if current_chunk:
122
- chunk_text = " ".join(current_chunk)
123
- sentiment, score = analyze_sentiment(chunk_text)
124
- chunks.append((chunk_text, sentiment, score))
125
- current_chunk = []
126
- continue
127
-
128
- if is_listening and text.strip():
129
- print(f"Transcription: {text}")
130
- current_chunk.append(text)
131
-
132
- if time.time() - chunk_start_time > 3:
133
- if current_chunk:
134
- chunk_text = " ".join(current_chunk)
135
-
136
- # Always process sentiment
137
- sentiment, score = analyze_sentiment(chunk_text)
138
- chunks.append((chunk_text, sentiment, score))
139
-
140
- # Get objection responses and check similarity score
141
- query_embedding = model.encode([chunk_text])
142
- distances, indices = objection_handler.index.search(query_embedding, 1)
143
-
144
- # If similarity is high enough, show objection response
145
- if distances[0][0] < 1.5: # Threshold for similarity
146
- responses = objection_handler.handle_objection(chunk_text)
147
- if responses:
148
- print("\nSuggested Response:")
149
- for response in responses:
150
- print(f"→ {response}")
151
-
152
- # Get product recommendations and check similarity score
153
- distances, indices = product_recommender.index.search(query_embedding, 1)
154
-
155
- # If similarity is high enough, show recommendations
156
- if distances[0][0] < 1.5: # Threshold for similarity
157
- recommendations = product_recommender.get_recommendations(chunk_text)
158
- if recommendations:
159
- print(f"\nRecommendations for this response:")
160
- for idx, rec in enumerate(recommendations, 1):
161
- print(f"{idx}. {rec}")
162
-
163
- print("\n")
164
- current_chunk = []
165
- chunk_start_time = time.time()
166
-
167
- except KeyboardInterrupt:
168
- print("\nExiting...")
169
- stream.stop_stream()
170
-
171
- return chunks
172
-
173
- if __name__ == "__main__":
174
- objections_file_path = r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv"
175
- objections_dict = load_objections(objections_file_path)
176
- transcribed_chunks = transcribe_with_chunks(objections_dict)
177
  print("Final transcriptions and sentiments:", transcribed_chunks)
 
1
+ import os
2
+ import json
3
+ import time
4
+ import speech_recognition as sr
5
+ from vosk import Model, KaldiRecognizer
6
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
7
+ from huggingface_hub import login
8
+ from product_recommender import ProductRecommender
9
+ from objection_handler import load_objections, check_objections # Ensure check_objections is imported
10
+ from objection_handler import ObjectionHandler
11
+ from env_setup import config
12
+ from sentence_transformers import SentenceTransformer
13
+ from dotenv import load_dotenv
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+
18
+ # Initialize the ProductRecommender
19
+ product_recommender = ProductRecommender(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet2.csv")
20
+
21
+ # Hugging Face API setup
22
+ huggingface_api_key = config["huggingface_api_key"]
23
+ login(token=huggingface_api_key)
24
+
25
+ # Sentiment Analysis Model
26
+ model_name = "tabularisai/multilingual-sentiment-analysis"
27
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
28
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
29
+ sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
30
+
31
+ # Vosk Speech Recognition Model
32
+ vosk_model_path = config["vosk_model_path"]
33
+
34
+ if not vosk_model_path:
35
+ raise ValueError("Error: vosk_model_path is not set in the .env file.")
36
+
37
+ try:
38
+ vosk_model = Model(vosk_model_path)
39
+ print("Vosk model loaded successfully.")
40
+ except Exception as e:
41
+ raise ValueError(f"Failed to load Vosk model: {e}")
42
+
43
+ recognizer = KaldiRecognizer(vosk_model, 16000)
44
+ audio = pyaudio.PyAudio()
45
+
46
+ stream = audio.open(format=pyaudio.paInt16,
47
+ channels=1,
48
+ rate=16000,
49
+ input=True,
50
+ frames_per_buffer=4000)
51
+ stream.start_stream()
52
+
53
+ # Function to analyze sentiment
54
+ def preprocess_text(text):
55
+ """Preprocess text for better sentiment analysis."""
56
+ # Strip whitespace and convert to lowercase
57
+ processed = text.strip().lower()
58
+ return processed
59
+
60
+ def preprocess_text(text):
61
+ """Preprocess text for better sentiment analysis."""
62
+ return text.strip().lower()
63
+
64
+ def analyze_sentiment(text):
65
+ """Analyze sentiment of the text using Hugging Face model."""
66
+ try:
67
+ if not text.strip():
68
+ return "NEUTRAL", 0.0
69
+
70
+ processed_text = preprocess_text(text)
71
+ result = sentiment_analyzer(processed_text)[0]
72
+
73
+ print(f"Sentiment Analysis Result: {result}")
74
+
75
+ # Map raw labels to sentiments
76
+ sentiment_map = {
77
+ 'Very Negative': "NEGATIVE",
78
+ 'Negative': "NEGATIVE",
79
+ 'Neutral': "NEUTRAL",
80
+ 'Positive': "POSITIVE",
81
+ 'Very Positive': "POSITIVE"
82
+ }
83
+
84
+ sentiment = sentiment_map.get(result['label'], "NEUTRAL")
85
+ return sentiment, result['score']
86
+
87
+ except Exception as e:
88
+ print(f"Error in sentiment analysis: {e}")
89
+ return "NEUTRAL", 0.5
90
+
91
+ def transcribe_with_chunks(objections_dict):
92
+ """Perform real-time transcription with sentiment analysis."""
93
+ print("Say 'start listening' to begin transcription. Say 'stop listening' to stop.")
94
+ is_listening = False
95
+ chunks = []
96
+ current_chunk = []
97
+ chunk_start_time = time.time()
98
+
99
+ # Initialize handlers with semantic search capabilities
100
+ objection_handler = ObjectionHandler(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv")
101
+ product_recommender = ProductRecommender(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet2.csv")
102
+
103
+ # Load the embeddings model once
104
+ model = SentenceTransformer('all-MiniLM-L6-v2')
105
+
106
+ try:
107
+ while True:
108
+ data = stream.read(4000, exception_on_overflow=False)
109
+
110
+ if recognizer.AcceptWaveform(data):
111
+ result = recognizer.Result()
112
+ text = json.loads(result)["text"]
113
+
114
+ if "start listening" in text.lower():
115
+ is_listening = True
116
+ print("Listening started. Speak into the microphone.")
117
+ continue
118
+ elif "stop listening" in text.lower():
119
+ is_listening = False
120
+ print("Listening stopped.")
121
+ if current_chunk:
122
+ chunk_text = " ".join(current_chunk)
123
+ sentiment, score = analyze_sentiment(chunk_text)
124
+ chunks.append((chunk_text, sentiment, score))
125
+ current_chunk = []
126
+ continue
127
+
128
+ if is_listening and text.strip():
129
+ print(f"Transcription: {text}")
130
+ current_chunk.append(text)
131
+
132
+ if time.time() - chunk_start_time > 3:
133
+ if current_chunk:
134
+ chunk_text = " ".join(current_chunk)
135
+
136
+ # Always process sentiment
137
+ sentiment, score = analyze_sentiment(chunk_text)
138
+ chunks.append((chunk_text, sentiment, score))
139
+
140
+ # Get objection responses and check similarity score
141
+ query_embedding = model.encode([chunk_text])
142
+ distances, indices = objection_handler.index.search(query_embedding, 1)
143
+
144
+ # If similarity is high enough, show objection response
145
+ if distances[0][0] < 1.5: # Threshold for similarity
146
+ responses = objection_handler.handle_objection(chunk_text)
147
+ if responses:
148
+ print("\nSuggested Response:")
149
+ for response in responses:
150
+ print(f"→ {response}")
151
+
152
+ # Get product recommendations and check similarity score
153
+ distances, indices = product_recommender.index.search(query_embedding, 1)
154
+
155
+ # If similarity is high enough, show recommendations
156
+ if distances[0][0] < 1.5: # Threshold for similarity
157
+ recommendations = product_recommender.get_recommendations(chunk_text)
158
+ if recommendations:
159
+ print(f"\nRecommendations for this response:")
160
+ for idx, rec in enumerate(recommendations, 1):
161
+ print(f"{idx}. {rec}")
162
+
163
+ print("\n")
164
+ current_chunk = []
165
+ chunk_start_time = time.time()
166
+
167
+ except KeyboardInterrupt:
168
+ print("\nExiting...")
169
+ stream.stop_stream()
170
+
171
+ return chunks
172
+
173
+ if __name__ == "__main__":
174
+ objections_file_path = r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv"
175
+ objections_dict = load_objections(objections_file_path)
176
+ transcribed_chunks = transcribe_with_chunks(objections_dict)
177
  print("Final transcriptions and sentiments:", transcribed_chunks)