Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Sleeping

App Files Files Community

invincible-jha commited on Nov 27, 2024

Commit

beebdff

verified ·

1 Parent(s): 81beee5

Upload app.py

Browse files

Files changed (1) hide show

app.py +39 -107

app.py CHANGED Viewed

@@ -22,6 +22,41 @@ emotion_tokenizer = None
 emotion_model = None
 clinical_analyzer = None
 class ClinicalVoiceAnalyzer:
     """Clinical voice analysis system using Anthropic's Claude for interpretation."""
@@ -37,16 +72,7 @@ class ClinicalVoiceAnalyzer:
         print("Clinical analyzer initialized successfully")
     def analyze_voice_metrics(self, features: Dict, emotions: Dict, transcription: str) -> str:
-        """Perform comprehensive clinical analysis of voice characteristics.
-        Args:
-            features: Dictionary of extracted voice features
-            emotions: Dictionary of emotion scores
-            transcription: Text transcription of the speech
-        Returns:
-            str: Detailed clinical analysis
-        """
         try:
             prompt = self._create_analysis_prompt(features, emotions, transcription)
             response = self.anthropic.messages.create(
@@ -63,11 +89,7 @@ class ClinicalVoiceAnalyzer:
             return self._generate_fallback_analysis(features, emotions)
     def _create_analysis_prompt(self, features: Dict, emotions: Dict, transcription: str) -> str:
-        """Create a detailed prompt for clinical analysis.
-        Constructs a comprehensive prompt that includes all relevant voice metrics,
-        emotional patterns, and speech content for analysis.
-        """
         return f"""As a clinical voice analysis expert specializing in mental health assessment,
         provide a detailed psychological evaluation based on the following data:
@@ -120,94 +142,7 @@ Emotional Indication:
 Note: This is a basic analysis. For detailed clinical interpretation, please ensure API connectivity.
 """
-# Your existing functions (load_models, extract_prosodic_features, etc.) remain the same...
-def analyze_audio(audio_input):
-    """Enhanced main function with clinical analysis integration."""
-    try:
-        if audio_input is None:
-            return "Please provide an audio input", None, None
-        # Existing audio processing code...
-        if isinstance(audio_input, tuple):
-            audio_path = audio_input[0]
-        else:
-            audio_path = audio_input
-        waveform, sr = librosa.load(audio_path, sr=16000, duration=30)
-        duration = len(waveform) / sr
-        if duration < 0.5:
-            return "Audio too short. Please provide a recording of at least 0.5 seconds.", None, None
-        features = extract_prosodic_features(waveform, sr)
-        if features is None:
-            return "Error extracting voice features. Please try recording again.", None, None
-        feature_viz = create_feature_plots(features)
-        # Speech recognition
-        inputs = processor(waveform, sampling_rate=sr, return_tensors="pt").input_features
-        with torch.no_grad():
-            predicted_ids = whisper_model.generate(inputs)
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-        # Emotion analysis
-        emotion_inputs = emotion_tokenizer(
-            transcription,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=512
-        )
-        with torch.no_grad():
-            emotion_outputs = emotion_model(**emotion_inputs)
-        emotions = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)
-        emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
-        emotion_scores = {
-            label: float(score)
-            for label, score in zip(emotion_labels, emotions[0].cpu().numpy())
-        }
-        emotion_viz = create_emotion_plot(emotion_scores)
-        # Generate clinical analysis
-        global clinical_analyzer
-        if clinical_analyzer is None:
-            clinical_analyzer = ClinicalVoiceAnalyzer()
-        clinical_analysis = clinical_analyzer.analyze_voice_metrics(
-            features, emotion_scores, transcription
-        )
-        # Create enhanced summary with clinical insights
-        summary = f"""Voice Analysis Summary:
-Speech Content:
-{transcription}
-Voice Characteristics:
-- Average Pitch: {features['pitch_mean']:.2f} Hz
-- Pitch Variation: {features['pitch_std']:.2f} Hz
-- Speech Rate (Tempo): {features['tempo']:.2f} BPM
-- Voice Energy: {features['energy_mean']:.4f}
-Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
-Emotion Confidence: {max(emotion_scores.values()):.2%}
-Recording Duration: {duration:.2f} seconds
-{clinical_analysis}
-"""
-        return summary, emotion_viz, feature_viz
-    except Exception as e:
-        error_msg = f"Error in audio analysis: {str(e)}"
-        print(error_msg)
-        return error_msg, None, None
 # Initialize the application with clinical analysis capability
 try:
@@ -219,7 +154,7 @@ try:
     clinical_analyzer = ClinicalVoiceAnalyzer()
     print("Clinical analyzer initialized")
-    # Create Gradio interface with enhanced description
     demo = gr.Interface(
         fn=analyze_audio,
         inputs=gr.Audio(
@@ -257,12 +192,9 @@ try:
         - Speak clearly and naturally
         - Keep recordings between 1-5 seconds
         - Maintain consistent volume
-        Upload an audio file or record directly through your microphone.
         """
     )
-    # Launch the interface
     if __name__ == "__main__":
         demo.launch()

 emotion_model = None
 clinical_analyzer = None
+def load_models():
+    """Initialize and load all required machine learning models.
+    This function handles the loading of both the Whisper speech recognition model
+    and the emotion detection model. It includes proper error handling and
+    device management for optimal performance.
+    Returns:
+        bool: True if all models loaded successfully, False otherwise
+    """
+    global processor, whisper_model, emotion_tokenizer, emotion_model
+    try:
+        # Load the Whisper model for speech recognition
+        print("Loading Whisper model...")
+        processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
+        whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
+        # Load the emotion detection model
+        print("Loading emotion model...")
+        emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
+        emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
+        # Move models to CPU for consistent performance
+        device = "cpu"
+        whisper_model.to(device)
+        emotion_model.to(device)
+        print("Models loaded successfully!")
+        return True
+    except Exception as e:
+        print(f"Error loading models: {str(e)}")
+        return False
 class ClinicalVoiceAnalyzer:
     """Clinical voice analysis system using Anthropic's Claude for interpretation."""
         print("Clinical analyzer initialized successfully")
     def analyze_voice_metrics(self, features: Dict, emotions: Dict, transcription: str) -> str:
+        """Perform comprehensive clinical analysis of voice characteristics."""
         try:
             prompt = self._create_analysis_prompt(features, emotions, transcription)
             response = self.anthropic.messages.create(
             return self._generate_fallback_analysis(features, emotions)
     def _create_analysis_prompt(self, features: Dict, emotions: Dict, transcription: str) -> str:
+        """Create a detailed prompt for clinical analysis."""
         return f"""As a clinical voice analysis expert specializing in mental health assessment,
         provide a detailed psychological evaluation based on the following data:
 Note: This is a basic analysis. For detailed clinical interpretation, please ensure API connectivity.
 """
+[Rest of your existing code for extract_prosodic_features, create_feature_plots, create_emotion_plot, and analyze_audio functions...]
 # Initialize the application with clinical analysis capability
 try:
     clinical_analyzer = ClinicalVoiceAnalyzer()
     print("Clinical analyzer initialized")
+    # Create Gradio interface
     demo = gr.Interface(
         fn=analyze_audio,
         inputs=gr.Audio(
         - Speak clearly and naturally
         - Keep recordings between 1-5 seconds
         - Maintain consistent volume
         """
     )
     if __name__ == "__main__":
         demo.launch()