Zasha1 commited on
Commit
3470239
·
verified ·
1 Parent(s): b8ac19d

Update sentiment_analysis.py

Browse files
Files changed (1) hide show
  1. sentiment_analysis.py +81 -79
sentiment_analysis.py CHANGED
@@ -1,16 +1,14 @@
1
  import os
2
  import json
3
  import time
4
- import numpy as np
5
- import sounddevice as sd
6
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
7
  from huggingface_hub import login
8
  from product_recommender import ProductRecommender
9
- from objection_handler import load_objections, check_objections
10
  from objection_handler import ObjectionHandler
11
  from env_setup import config
12
  from sentence_transformers import SentenceTransformer
13
- from scipy.io.wavfile import write
14
  from dotenv import load_dotenv
15
 
16
  # Load environment variables
@@ -26,6 +24,9 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
  sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
28
 
 
 
 
29
  # Function to analyze sentiment
30
  def preprocess_text(text):
31
  """Preprocess text for better sentiment analysis."""
@@ -36,12 +37,12 @@ def analyze_sentiment(text):
36
  try:
37
  if not text.strip():
38
  return "NEUTRAL", 0.0
39
-
40
  processed_text = preprocess_text(text)
41
  result = sentiment_analyzer(processed_text)[0]
42
-
43
  print(f"Sentiment Analysis Result: {result}")
44
-
45
  # Map raw labels to sentiments
46
  sentiment_map = {
47
  'Very Negative': "NEGATIVE",
@@ -50,35 +51,14 @@ def analyze_sentiment(text):
50
  'Positive': "POSITIVE",
51
  'Very Positive': "POSITIVE"
52
  }
53
-
54
  sentiment = sentiment_map.get(result['label'], "NEUTRAL")
55
  return sentiment, result['score']
56
-
57
  except Exception as e:
58
  print(f"Error in sentiment analysis: {e}")
59
  return "NEUTRAL", 0.5
60
 
61
- def record_audio(duration=5, sample_rate=44100):
62
- """Record audio for a specified duration."""
63
- print("Recording audio...")
64
- audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
65
- sd.wait() # Wait for the recording to finish
66
- print("Recording completed.")
67
- return np.squeeze(audio)
68
-
69
- def transcribe_audio(audio, sample_rate=44100):
70
- """Transcribe recorded audio using a speech-to-text API."""
71
- try:
72
- # Save audio to a temporary file for transcription
73
- audio_file = "temp_audio.wav"
74
- write(audio_file, sample_rate, audio)
75
- # Call external transcription service (e.g., Whisper, AssemblyAI, or Google)
76
- transcription = "Example transcription text from audio." # Placeholder
77
- return transcription
78
- except Exception as e:
79
- print(f"Error in audio transcription: {e}")
80
- return ""
81
-
82
  def transcribe_with_chunks(objections_dict):
83
  """Perform real-time transcription with sentiment analysis."""
84
  print("Say 'start listening' to begin transcription. Say 'stop listening' to stop.")
@@ -88,64 +68,86 @@ def transcribe_with_chunks(objections_dict):
88
  chunk_start_time = time.time()
89
 
90
  # Initialize handlers with semantic search capabilities
91
- objection_handler = ObjectionHandler("path_to_objections.csv")
92
- product_recommender = ProductRecommender("path_to_recommendations.csv")
93
 
94
  # Load the embeddings model once
95
  model = SentenceTransformer('all-MiniLM-L6-v2')
96
 
97
  try:
98
- while True:
99
- if not is_listening:
100
- command = input("Enter 'start' to begin listening or 'stop' to quit: ").lower()
101
- if command == "start":
102
- is_listening = True
103
- print("Listening started. Speak into the microphone.")
104
- continue
105
- elif command == "stop":
106
- break
107
-
108
- # Record and process audio in chunks
109
- audio_data = record_audio(duration=5)
110
- text = transcribe_audio(audio_data)
111
- if text.strip():
112
- print(f"Transcription: {text}")
113
- current_chunk.append(text)
114
-
115
- if time.time() - chunk_start_time > 3:
116
- if current_chunk:
117
- chunk_text = " ".join(current_chunk)
118
-
119
- # Process sentiment
120
- sentiment, score = analyze_sentiment(chunk_text)
121
- chunks.append((chunk_text, sentiment, score))
122
-
123
- # Handle objections and recommendations
124
- query_embedding = model.encode([chunk_text])
125
- responses = objection_handler.handle_objection(chunk_text)
126
- recommendations = product_recommender.get_recommendations(chunk_text)
127
-
128
- # Print results
129
- if responses:
130
- print("\nSuggested Response:")
131
- for response in responses:
132
- print(f"→ {response}")
133
- if recommendations:
134
- print("\nRecommendations for this response:")
135
- for idx, rec in enumerate(recommendations, 1):
136
- print(f"{idx}. {rec}")
137
-
138
- print("\n")
139
- current_chunk = []
140
- chunk_start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  except KeyboardInterrupt:
143
  print("\nExiting...")
144
-
145
- return chunks
146
 
147
  if __name__ == "__main__":
148
- objections_file_path = "path_to_objections.csv"
149
  objections_dict = load_objections(objections_file_path)
150
  transcribed_chunks = transcribe_with_chunks(objections_dict)
151
- print("Final transcriptions and sentiments:", transcribed_chunks)
 
1
  import os
2
  import json
3
  import time
4
+ from speech_recognition import Recognizer, Microphone, AudioData, UnknownValueError, RequestError
 
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
  from huggingface_hub import login
7
  from product_recommender import ProductRecommender
8
+ from objection_handler import load_objections, check_objections # Ensure check_objections is imported
9
  from objection_handler import ObjectionHandler
10
  from env_setup import config
11
  from sentence_transformers import SentenceTransformer
 
12
  from dotenv import load_dotenv
13
 
14
  # Load environment variables
 
24
  tokenizer = AutoTokenizer.from_pretrained(model_name)
25
  sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
26
 
27
+ # Speech Recognition Setup
28
+ recognizer = Recognizer()
29
+
30
  # Function to analyze sentiment
31
  def preprocess_text(text):
32
  """Preprocess text for better sentiment analysis."""
 
37
  try:
38
  if not text.strip():
39
  return "NEUTRAL", 0.0
40
+
41
  processed_text = preprocess_text(text)
42
  result = sentiment_analyzer(processed_text)[0]
43
+
44
  print(f"Sentiment Analysis Result: {result}")
45
+
46
  # Map raw labels to sentiments
47
  sentiment_map = {
48
  'Very Negative': "NEGATIVE",
 
51
  'Positive': "POSITIVE",
52
  'Very Positive': "POSITIVE"
53
  }
54
+
55
  sentiment = sentiment_map.get(result['label'], "NEUTRAL")
56
  return sentiment, result['score']
57
+
58
  except Exception as e:
59
  print(f"Error in sentiment analysis: {e}")
60
  return "NEUTRAL", 0.5
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def transcribe_with_chunks(objections_dict):
63
  """Perform real-time transcription with sentiment analysis."""
64
  print("Say 'start listening' to begin transcription. Say 'stop listening' to stop.")
 
68
  chunk_start_time = time.time()
69
 
70
  # Initialize handlers with semantic search capabilities
71
+ objection_handler = ObjectionHandler(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv")
72
+ product_recommender = ProductRecommender(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet2.csv")
73
 
74
  # Load the embeddings model once
75
  model = SentenceTransformer('all-MiniLM-L6-v2')
76
 
77
  try:
78
+ with Microphone() as source:
79
+ recognizer.adjust_for_ambient_noise(source)
80
+ print("Microphone calibrated. Please speak.")
81
+
82
+ while True:
83
+ print("Listening for speech...")
84
+ try:
85
+ audio_data = recognizer.listen(source, timeout=5)
86
+ text = recognizer.recognize_google(audio_data)
87
+
88
+ if "start listening" in text.lower():
89
+ is_listening = True
90
+ print("Listening started. Speak into the microphone.")
91
+ continue
92
+ elif "stop listening" in text.lower():
93
+ is_listening = False
94
+ print("Listening stopped.")
95
+ if current_chunk:
96
+ chunk_text = " ".join(current_chunk)
97
+ sentiment, score = analyze_sentiment(chunk_text)
98
+ chunks.append((chunk_text, sentiment, score))
99
+ current_chunk = []
100
+ continue
101
+
102
+ if is_listening and text.strip():
103
+ print(f"Transcription: {text}")
104
+ current_chunk.append(text)
105
+
106
+ if time.time() - chunk_start_time > 3:
107
+ if current_chunk:
108
+ chunk_text = " ".join(current_chunk)
109
+
110
+ # Always process sentiment
111
+ sentiment, score = analyze_sentiment(chunk_text)
112
+ chunks.append((chunk_text, sentiment, score))
113
+
114
+ # Get objection responses and check similarity score
115
+ query_embedding = model.encode([chunk_text])
116
+ distances, indices = objection_handler.index.search(query_embedding, 1)
117
+
118
+ # If similarity is high enough, show objection response
119
+ if distances[0][0] < 1.5: # Threshold for similarity
120
+ responses = objection_handler.handle_objection(chunk_text)
121
+ if responses:
122
+ print("\nSuggested Response:")
123
+ for response in responses:
124
+ print(f"→ {response}")
125
+
126
+ # Get product recommendations and check similarity score
127
+ distances, indices = product_recommender.index.search(query_embedding, 1)
128
+
129
+ # If similarity is high enough, show recommendations
130
+ if distances[0][0] < 1.5: # Threshold for similarity
131
+ recommendations = product_recommender.get_recommendations(chunk_text)
132
+ if recommendations:
133
+ print(f"\nRecommendations for this response:")
134
+ for idx, rec in enumerate(recommendations, 1):
135
+ print(f"{idx}. {rec}")
136
+
137
+ print("\n")
138
+ current_chunk = []
139
+ chunk_start_time = time.time()
140
+ except UnknownValueError:
141
+ print("Could not understand the audio.")
142
+ except RequestError as e:
143
+ print(f"Could not request results from Google Speech Recognition service; {e}")
144
 
145
  except KeyboardInterrupt:
146
  print("\nExiting...")
147
+ return chunks
 
148
 
149
  if __name__ == "__main__":
150
+ objections_file_path = r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv"
151
  objections_dict = load_objections(objections_file_path)
152
  transcribed_chunks = transcribe_with_chunks(objections_dict)
153
+ print("Final transcriptions and sentiments:", transcribed_chunks)