invincible-jha commited on
Commit
beebdff
·
verified ·
1 Parent(s): 81beee5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -107
app.py CHANGED
@@ -22,6 +22,41 @@ emotion_tokenizer = None
22
  emotion_model = None
23
  clinical_analyzer = None
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  class ClinicalVoiceAnalyzer:
26
  """Clinical voice analysis system using Anthropic's Claude for interpretation."""
27
 
@@ -37,16 +72,7 @@ class ClinicalVoiceAnalyzer:
37
  print("Clinical analyzer initialized successfully")
38
 
39
  def analyze_voice_metrics(self, features: Dict, emotions: Dict, transcription: str) -> str:
40
- """Perform comprehensive clinical analysis of voice characteristics.
41
-
42
- Args:
43
- features: Dictionary of extracted voice features
44
- emotions: Dictionary of emotion scores
45
- transcription: Text transcription of the speech
46
-
47
- Returns:
48
- str: Detailed clinical analysis
49
- """
50
  try:
51
  prompt = self._create_analysis_prompt(features, emotions, transcription)
52
  response = self.anthropic.messages.create(
@@ -63,11 +89,7 @@ class ClinicalVoiceAnalyzer:
63
  return self._generate_fallback_analysis(features, emotions)
64
 
65
  def _create_analysis_prompt(self, features: Dict, emotions: Dict, transcription: str) -> str:
66
- """Create a detailed prompt for clinical analysis.
67
-
68
- Constructs a comprehensive prompt that includes all relevant voice metrics,
69
- emotional patterns, and speech content for analysis.
70
- """
71
  return f"""As a clinical voice analysis expert specializing in mental health assessment,
72
  provide a detailed psychological evaluation based on the following data:
73
 
@@ -120,94 +142,7 @@ Emotional Indication:
120
  Note: This is a basic analysis. For detailed clinical interpretation, please ensure API connectivity.
121
  """
122
 
123
- # Your existing functions (load_models, extract_prosodic_features, etc.) remain the same...
124
-
125
- def analyze_audio(audio_input):
126
- """Enhanced main function with clinical analysis integration."""
127
- try:
128
- if audio_input is None:
129
- return "Please provide an audio input", None, None
130
-
131
- # Existing audio processing code...
132
- if isinstance(audio_input, tuple):
133
- audio_path = audio_input[0]
134
- else:
135
- audio_path = audio_input
136
-
137
- waveform, sr = librosa.load(audio_path, sr=16000, duration=30)
138
- duration = len(waveform) / sr
139
-
140
- if duration < 0.5:
141
- return "Audio too short. Please provide a recording of at least 0.5 seconds.", None, None
142
-
143
- features = extract_prosodic_features(waveform, sr)
144
- if features is None:
145
- return "Error extracting voice features. Please try recording again.", None, None
146
-
147
- feature_viz = create_feature_plots(features)
148
-
149
- # Speech recognition
150
- inputs = processor(waveform, sampling_rate=sr, return_tensors="pt").input_features
151
- with torch.no_grad():
152
- predicted_ids = whisper_model.generate(inputs)
153
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
154
-
155
- # Emotion analysis
156
- emotion_inputs = emotion_tokenizer(
157
- transcription,
158
- return_tensors="pt",
159
- padding=True,
160
- truncation=True,
161
- max_length=512
162
- )
163
-
164
- with torch.no_grad():
165
- emotion_outputs = emotion_model(**emotion_inputs)
166
- emotions = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)
167
-
168
- emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
169
- emotion_scores = {
170
- label: float(score)
171
- for label, score in zip(emotion_labels, emotions[0].cpu().numpy())
172
- }
173
-
174
- emotion_viz = create_emotion_plot(emotion_scores)
175
-
176
- # Generate clinical analysis
177
- global clinical_analyzer
178
- if clinical_analyzer is None:
179
- clinical_analyzer = ClinicalVoiceAnalyzer()
180
-
181
- clinical_analysis = clinical_analyzer.analyze_voice_metrics(
182
- features, emotion_scores, transcription
183
- )
184
-
185
- # Create enhanced summary with clinical insights
186
- summary = f"""Voice Analysis Summary:
187
-
188
- Speech Content:
189
- {transcription}
190
-
191
- Voice Characteristics:
192
- - Average Pitch: {features['pitch_mean']:.2f} Hz
193
- - Pitch Variation: {features['pitch_std']:.2f} Hz
194
- - Speech Rate (Tempo): {features['tempo']:.2f} BPM
195
- - Voice Energy: {features['energy_mean']:.4f}
196
-
197
- Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
198
- Emotion Confidence: {max(emotion_scores.values()):.2%}
199
-
200
- Recording Duration: {duration:.2f} seconds
201
-
202
- {clinical_analysis}
203
- """
204
-
205
- return summary, emotion_viz, feature_viz
206
-
207
- except Exception as e:
208
- error_msg = f"Error in audio analysis: {str(e)}"
209
- print(error_msg)
210
- return error_msg, None, None
211
 
212
  # Initialize the application with clinical analysis capability
213
  try:
@@ -219,7 +154,7 @@ try:
219
  clinical_analyzer = ClinicalVoiceAnalyzer()
220
  print("Clinical analyzer initialized")
221
 
222
- # Create Gradio interface with enhanced description
223
  demo = gr.Interface(
224
  fn=analyze_audio,
225
  inputs=gr.Audio(
@@ -257,12 +192,9 @@ try:
257
  - Speak clearly and naturally
258
  - Keep recordings between 1-5 seconds
259
  - Maintain consistent volume
260
-
261
- Upload an audio file or record directly through your microphone.
262
  """
263
  )
264
 
265
- # Launch the interface
266
  if __name__ == "__main__":
267
  demo.launch()
268
 
 
22
  emotion_model = None
23
  clinical_analyzer = None
24
 
25
+ def load_models():
26
+ """Initialize and load all required machine learning models.
27
+
28
+ This function handles the loading of both the Whisper speech recognition model
29
+ and the emotion detection model. It includes proper error handling and
30
+ device management for optimal performance.
31
+
32
+ Returns:
33
+ bool: True if all models loaded successfully, False otherwise
34
+ """
35
+ global processor, whisper_model, emotion_tokenizer, emotion_model
36
+
37
+ try:
38
+ # Load the Whisper model for speech recognition
39
+ print("Loading Whisper model...")
40
+ processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
41
+ whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
42
+
43
+ # Load the emotion detection model
44
+ print("Loading emotion model...")
45
+ emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
46
+ emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
47
+
48
+ # Move models to CPU for consistent performance
49
+ device = "cpu"
50
+ whisper_model.to(device)
51
+ emotion_model.to(device)
52
+
53
+ print("Models loaded successfully!")
54
+ return True
55
+
56
+ except Exception as e:
57
+ print(f"Error loading models: {str(e)}")
58
+ return False
59
+
60
  class ClinicalVoiceAnalyzer:
61
  """Clinical voice analysis system using Anthropic's Claude for interpretation."""
62
 
 
72
  print("Clinical analyzer initialized successfully")
73
 
74
  def analyze_voice_metrics(self, features: Dict, emotions: Dict, transcription: str) -> str:
75
+ """Perform comprehensive clinical analysis of voice characteristics."""
 
 
 
 
 
 
 
 
 
76
  try:
77
  prompt = self._create_analysis_prompt(features, emotions, transcription)
78
  response = self.anthropic.messages.create(
 
89
  return self._generate_fallback_analysis(features, emotions)
90
 
91
  def _create_analysis_prompt(self, features: Dict, emotions: Dict, transcription: str) -> str:
92
+ """Create a detailed prompt for clinical analysis."""
 
 
 
 
93
  return f"""As a clinical voice analysis expert specializing in mental health assessment,
94
  provide a detailed psychological evaluation based on the following data:
95
 
 
142
  Note: This is a basic analysis. For detailed clinical interpretation, please ensure API connectivity.
143
  """
144
 
145
+ [Rest of your existing code for extract_prosodic_features, create_feature_plots, create_emotion_plot, and analyze_audio functions...]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # Initialize the application with clinical analysis capability
148
  try:
 
154
  clinical_analyzer = ClinicalVoiceAnalyzer()
155
  print("Clinical analyzer initialized")
156
 
157
+ # Create Gradio interface
158
  demo = gr.Interface(
159
  fn=analyze_audio,
160
  inputs=gr.Audio(
 
192
  - Speak clearly and naturally
193
  - Keep recordings between 1-5 seconds
194
  - Maintain consistent volume
 
 
195
  """
196
  )
197
 
 
198
  if __name__ == "__main__":
199
  demo.launch()
200