invincible-jha
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,10 @@ from dotenv import load_dotenv
|
|
15 |
# Load environment variables
|
16 |
load_dotenv()
|
17 |
|
|
|
|
|
|
|
|
|
18 |
# Suppress warnings
|
19 |
warnings.filterwarnings('ignore')
|
20 |
|
@@ -30,17 +34,26 @@ def load_models():
|
|
30 |
global processor, whisper_model, emotion_tokenizer, emotion_model
|
31 |
|
32 |
try:
|
33 |
-
# Load Whisper model
|
34 |
print("Loading Whisper model...")
|
35 |
-
processor = WhisperProcessor.from_pretrained(
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
# Load emotion model
|
39 |
print("Loading emotion model...")
|
40 |
-
emotion_tokenizer = AutoTokenizer.from_pretrained(
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
# Set device
|
44 |
device = "cpu"
|
45 |
whisper_model.to(device)
|
46 |
emotion_model.to(device)
|
@@ -136,75 +149,147 @@ def extract_prosodic_features(waveform, sr):
|
|
136 |
return None
|
137 |
|
138 |
class ClinicalVoiceAnalyzer:
|
139 |
-
"""
|
140 |
|
141 |
def __init__(self):
|
142 |
"""Initialize analyzer with API and reference ranges."""
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
def analyze_voice_metrics(self, features, emotions, transcription):
|
153 |
-
"""
|
154 |
try:
|
|
|
|
|
|
|
155 |
prompt = self._create_clinical_prompt(features, emotions, transcription)
|
|
|
|
|
156 |
response = self.anthropic.messages.create(
|
157 |
model=self.model,
|
158 |
max_tokens=1000,
|
159 |
-
messages=[{
|
|
|
|
|
|
|
|
|
160 |
)
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
except Exception as e:
|
163 |
print(f"Clinical analysis error: {e}")
|
164 |
return self._generate_backup_analysis(features, emotions)
|
165 |
|
166 |
def _create_clinical_prompt(self, features, emotions, transcription):
|
167 |
-
"""Create clinical analysis
|
168 |
-
|
169 |
|
170 |
-
Voice
|
171 |
-
- Pitch: {features['pitch_mean']:.2f} Hz (Normal: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
|
172 |
- Pitch Variation: {features['pitch_std']:.2f} Hz
|
173 |
-
- Speech Rate: {features['tempo']:.2f} BPM (Normal: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
|
174 |
-
- Voice Energy: {features['energy_mean']:.4f}
|
175 |
|
176 |
-
|
177 |
{', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
|
178 |
|
179 |
Speech Content:
|
180 |
"{transcription}"
|
181 |
|
182 |
-
|
183 |
-
1.
|
184 |
-
2.
|
185 |
-
3.
|
186 |
-
4.
|
187 |
-
5.
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
def _format_analysis(self, analysis):
|
190 |
-
"""Format clinical analysis output."""
|
191 |
return f"\nClinical Assessment:\n{analysis}"
|
192 |
|
193 |
def _generate_backup_analysis(self, features, emotions):
|
194 |
-
"""Generate
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
- Pitch Status: {pitch_status} ({features['pitch_mean']:.2f} Hz)
|
205 |
-
- Speech Rate: {features['tempo']:.2f} BPM
|
206 |
-
- Voice Energy Level: {features['energy_mean']:.4f}
|
207 |
-
- Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
def create_feature_plots(features):
|
210 |
"""Create visualizations for voice features."""
|
@@ -334,10 +419,12 @@ def analyze_audio(audio_input):
|
|
334 |
global clinical_analyzer
|
335 |
if clinical_analyzer is None:
|
336 |
clinical_analyzer = ClinicalVoiceAnalyzer()
|
337 |
-
|
|
|
338 |
clinical_analysis = clinical_analyzer.analyze_voice_metrics(
|
339 |
features, emotion_scores, transcription
|
340 |
)
|
|
|
341 |
|
342 |
# Create summary with fixed string formatting
|
343 |
summary = f"""Voice Analysis Summary:
|
@@ -369,9 +456,11 @@ Recording Duration: {duration:.2f} seconds
|
|
369 |
try:
|
370 |
print("===== Application Startup =====")
|
371 |
|
|
|
372 |
if not load_models():
|
373 |
raise RuntimeError("Model loading failed")
|
374 |
|
|
|
375 |
clinical_analyzer = ClinicalVoiceAnalyzer()
|
376 |
print("Clinical analyzer initialized")
|
377 |
|
@@ -428,10 +517,10 @@ Upload an audio file or record directly through your microphone."""
|
|
428 |
|
429 |
if __name__ == "__main__":
|
430 |
demo.launch(
|
431 |
-
server_name="0.0.0.0",
|
432 |
-
server_port=7860,
|
433 |
-
share=False,
|
434 |
-
debug=False
|
435 |
)
|
436 |
|
437 |
except Exception as e:
|
|
|
15 |
# Load environment variables
|
16 |
load_dotenv()
|
17 |
|
18 |
+
# Get API tokens
|
19 |
+
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', 'your_anthropic_api_key')
|
20 |
+
HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN', 'your_huggingface_api_token')
|
21 |
+
|
22 |
# Suppress warnings
|
23 |
warnings.filterwarnings('ignore')
|
24 |
|
|
|
34 |
global processor, whisper_model, emotion_tokenizer, emotion_model
|
35 |
|
36 |
try:
|
|
|
37 |
print("Loading Whisper model...")
|
38 |
+
processor = WhisperProcessor.from_pretrained(
|
39 |
+
"openai/whisper-tiny",
|
40 |
+
use_auth_token=HUGGINGFACE_TOKEN
|
41 |
+
)
|
42 |
+
whisper_model = WhisperForConditionalGeneration.from_pretrained(
|
43 |
+
"openai/whisper-tiny",
|
44 |
+
use_auth_token=HUGGINGFACE_TOKEN
|
45 |
+
)
|
46 |
|
|
|
47 |
print("Loading emotion model...")
|
48 |
+
emotion_tokenizer = AutoTokenizer.from_pretrained(
|
49 |
+
"j-hartmann/emotion-english-distilroberta-base",
|
50 |
+
use_auth_token=HUGGINGFACE_TOKEN
|
51 |
+
)
|
52 |
+
emotion_model = AutoModelForSequenceClassification.from_pretrained(
|
53 |
+
"j-hartmann/emotion-english-distilroberta-base",
|
54 |
+
use_auth_token=HUGGINGFACE_TOKEN
|
55 |
+
)
|
56 |
|
|
|
57 |
device = "cpu"
|
58 |
whisper_model.to(device)
|
59 |
emotion_model.to(device)
|
|
|
149 |
return None
|
150 |
|
151 |
class ClinicalVoiceAnalyzer:
|
152 |
+
"""Analyze voice characteristics for psychological indicators."""
|
153 |
|
154 |
def __init__(self):
|
155 |
"""Initialize analyzer with API and reference ranges."""
|
156 |
+
try:
|
157 |
+
if not ANTHROPIC_API_KEY:
|
158 |
+
raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
|
159 |
+
|
160 |
+
self.anthropic = Anthropic(api_key=ANTHROPIC_API_KEY)
|
161 |
+
self.model = "claude-3-opus-20240229"
|
162 |
+
|
163 |
+
self.reference_ranges = {
|
164 |
+
'pitch': {'min': 150, 'max': 400},
|
165 |
+
'tempo': {'min': 90, 'max': 130},
|
166 |
+
'energy': {'min': 0.01, 'max': 0.05}
|
167 |
+
}
|
168 |
+
print("Clinical analyzer ready")
|
169 |
+
except Exception as e:
|
170 |
+
print(f"Error initializing clinical analyzer: {e}")
|
171 |
+
self.anthropic = None
|
172 |
|
173 |
def analyze_voice_metrics(self, features, emotions, transcription):
|
174 |
+
"""Generate clinical insights from voice and emotion data."""
|
175 |
try:
|
176 |
+
if not self.anthropic:
|
177 |
+
return self._generate_backup_analysis(features, emotions)
|
178 |
+
|
179 |
prompt = self._create_clinical_prompt(features, emotions, transcription)
|
180 |
+
print("Sending analysis request to Anthropic API...")
|
181 |
+
|
182 |
response = self.anthropic.messages.create(
|
183 |
model=self.model,
|
184 |
max_tokens=1000,
|
185 |
+
messages=[{
|
186 |
+
"role": "user",
|
187 |
+
"content": prompt
|
188 |
+
}],
|
189 |
+
temperature=0.7
|
190 |
)
|
191 |
+
|
192 |
+
if response and hasattr(response, 'content'):
|
193 |
+
print("Received response from Anthropic API")
|
194 |
+
return self._format_analysis(response.content)
|
195 |
+
else:
|
196 |
+
print("No valid response from API")
|
197 |
+
return self._generate_backup_analysis(features, emotions)
|
198 |
+
|
199 |
except Exception as e:
|
200 |
print(f"Clinical analysis error: {e}")
|
201 |
return self._generate_backup_analysis(features, emotions)
|
202 |
|
203 |
def _create_clinical_prompt(self, features, emotions, transcription):
|
204 |
+
"""Create detailed prompt for clinical analysis."""
|
205 |
+
prompt = f"""As a clinical voice analysis expert, provide a detailed psychological assessment based on the following data:
|
206 |
|
207 |
+
Voice Characteristics Analysis:
|
208 |
+
- Pitch: {features['pitch_mean']:.2f} Hz (Normal range: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
|
209 |
- Pitch Variation: {features['pitch_std']:.2f} Hz
|
210 |
+
- Speech Rate: {features['tempo']:.2f} BPM (Normal range: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
|
211 |
+
- Voice Energy Level: {features['energy_mean']:.4f} (Normal range: {self.reference_ranges['energy']['min']}-{self.reference_ranges['energy']['max']})
|
212 |
|
213 |
+
Emotional Analysis:
|
214 |
{', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
|
215 |
|
216 |
Speech Content:
|
217 |
"{transcription}"
|
218 |
|
219 |
+
Please provide a comprehensive assessment including:
|
220 |
+
1. Detailed voice characteristic analysis and what it indicates about mental state
|
221 |
+
2. Assessment of emotional state based on both voice features and detected emotions
|
222 |
+
3. Potential indicators of anxiety, depression, or other mental health concerns
|
223 |
+
4. Evaluation of stress levels and emotional stability
|
224 |
+
5. Specific recommendations for mental health professionals or further assessment if needed
|
225 |
+
|
226 |
+
Base your analysis on established clinical research connecting voice biomarkers to psychological states."""
|
227 |
+
|
228 |
+
print(f"Generated prompt length: {len(prompt)} characters")
|
229 |
+
return prompt
|
230 |
|
231 |
def _format_analysis(self, analysis):
|
232 |
+
"""Format the clinical analysis output."""
|
233 |
return f"\nClinical Assessment:\n{analysis}"
|
234 |
|
235 |
def _generate_backup_analysis(self, features, emotions):
|
236 |
+
"""Generate basic analysis when API is unavailable."""
|
237 |
+
try:
|
238 |
+
dominant_emotion = max(emotions.items(), key=lambda x: x[1])
|
239 |
+
pitch_status = (
|
240 |
+
"elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max']
|
241 |
+
else "reduced" if features['pitch_mean'] < self.reference_ranges['pitch']['min']
|
242 |
+
else "normal"
|
243 |
+
)
|
244 |
+
|
245 |
+
tempo_status = (
|
246 |
+
"rapid" if features['tempo'] > self.reference_ranges['tempo']['max']
|
247 |
+
else "slow" if features['tempo'] < self.reference_ranges['tempo']['min']
|
248 |
+
else "normal"
|
249 |
+
)
|
250 |
+
|
251 |
+
energy_status = (
|
252 |
+
"high" if features['energy_mean'] > self.reference_ranges['energy']['max']
|
253 |
+
else "low" if features['energy_mean'] < self.reference_ranges['energy']['min']
|
254 |
+
else "normal"
|
255 |
+
)
|
256 |
+
|
257 |
+
return f"""
|
258 |
+
Detailed Voice Analysis:
|
259 |
- Pitch Status: {pitch_status} ({features['pitch_mean']:.2f} Hz)
|
260 |
+
- Speech Rate: {features['tempo']:.2f} BPM ({tempo_status})
|
261 |
+
- Voice Energy Level: {features['energy_mean']:.4f} ({energy_status})
|
262 |
+
- Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)
|
263 |
+
|
264 |
+
Potential Indicators:
|
265 |
+
- Pitch: {self._interpret_pitch(features['pitch_mean'], pitch_status)}
|
266 |
+
- Rate: {self._interpret_tempo(features['tempo'], tempo_status)}
|
267 |
+
- Energy: {self._interpret_energy(features['energy_mean'], energy_status)}
|
268 |
+
"""
|
269 |
+
except Exception as e:
|
270 |
+
print(f"Error in backup analysis: {e}")
|
271 |
+
return "Error generating analysis. Please try again."
|
272 |
+
|
273 |
+
def _interpret_pitch(self, pitch, status):
|
274 |
+
if status == "elevated":
|
275 |
+
return "May indicate heightened stress or anxiety"
|
276 |
+
elif status == "reduced":
|
277 |
+
return "Could suggest low energy or depressed mood"
|
278 |
+
return "Within normal range, suggesting stable emotional state"
|
279 |
+
|
280 |
+
def _interpret_tempo(self, tempo, status):
|
281 |
+
if status == "rapid":
|
282 |
+
return "May indicate anxiety or agitation"
|
283 |
+
elif status == "slow":
|
284 |
+
return "Could suggest fatigue or low mood"
|
285 |
+
return "Normal pacing indicates balanced emotional state"
|
286 |
+
|
287 |
+
def _interpret_energy(self, energy, status):
|
288 |
+
if status == "high":
|
289 |
+
return "May indicate heightened emotional state or agitation"
|
290 |
+
elif status == "low":
|
291 |
+
return "Could suggest reduced emotional expression or fatigue"
|
292 |
+
return "Appropriate energy level suggests emotional stability"
|
293 |
|
294 |
def create_feature_plots(features):
|
295 |
"""Create visualizations for voice features."""
|
|
|
419 |
global clinical_analyzer
|
420 |
if clinical_analyzer is None:
|
421 |
clinical_analyzer = ClinicalVoiceAnalyzer()
|
422 |
+
|
423 |
+
print("Initiating clinical analysis...") # Debug log
|
424 |
clinical_analysis = clinical_analyzer.analyze_voice_metrics(
|
425 |
features, emotion_scores, transcription
|
426 |
)
|
427 |
+
print("Clinical analysis completed") # Debug log
|
428 |
|
429 |
# Create summary with fixed string formatting
|
430 |
summary = f"""Voice Analysis Summary:
|
|
|
456 |
try:
|
457 |
print("===== Application Startup =====")
|
458 |
|
459 |
+
# Load required models with authentication
|
460 |
if not load_models():
|
461 |
raise RuntimeError("Model loading failed")
|
462 |
|
463 |
+
# Initialize clinical analyzer with authentication
|
464 |
clinical_analyzer = ClinicalVoiceAnalyzer()
|
465 |
print("Clinical analyzer initialized")
|
466 |
|
|
|
517 |
|
518 |
if __name__ == "__main__":
|
519 |
demo.launch(
|
520 |
+
server_name="0.0.0.0", # Allow external access
|
521 |
+
server_port=7860, # Default Gradio port
|
522 |
+
share=False, # Disable public URL generation
|
523 |
+
debug=False # Disable debug mode in production
|
524 |
)
|
525 |
|
526 |
except Exception as e:
|