Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Sleeping

App Files Files Community

invincible-jha commited on Nov 18, 2024

Commit

784383b

verified ·

1 Parent(s): 9ec2a83

Upload app.py

Browse files

Files changed (1) hide show

app.py +110 -48

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import plotly.graph_objects as go
 class ModelManager:
     def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.models = {}
         self.tokenizers = {}
         self.processors = {}
@@ -15,12 +15,23 @@ class ModelManager:
     def load_models(self):
         print("Loading Whisper model...")
-        self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
-        self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to(self.device)
         print("Loading emotion model...")
-        self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-        self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base").to(self.device)
 class AudioProcessor:
     def __init__(self):
@@ -28,14 +39,22 @@ class AudioProcessor:
         self.n_mfcc = 13
     def process_audio(self, audio_path):
-        waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
-        return waveform, self._extract_features(waveform)
     def _extract_features(self, waveform):
-        return {
-            'mfcc': librosa.feature.mfcc(y=waveform, sr=self.sample_rate, n_mfcc=self.n_mfcc),
-            'energy': librosa.feature.rms(y=waveform)[0]
-        }
 class Analyzer:
     def __init__(self):
@@ -45,45 +64,80 @@ class Analyzer:
         print("Analyzer initialization complete")
     def analyze(self, audio_path):
-        print(f"Processing audio file: {audio_path}")
-        waveform, features = self.audio_processor.process_audio(audio_path)
-        print("Transcribing audio...")
-        inputs = self.model_manager.processors['whisper'](waveform, return_tensors="pt").input_features.to(self.model_manager.device)
-        predicted_ids = self.model_manager.models['whisper'].generate(inputs)
-        transcription = self.model_manager.processors['whisper'].batch_decode(predicted_ids, skip_special_tokens=True)[0]
-        print("Analyzing emotions...")
-        inputs = self.model_manager.tokenizers['emotion'](transcription, return_tensors="pt", padding=True, truncation=True)
-        outputs = self.model_manager.models['emotion'](**inputs)
-        emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
-        emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
-        emotion_scores = {
-            label: float(score)
-            for label, score in zip(emotion_labels, emotions[0])
-        }
-        return {
-            'transcription': transcription,
-            'emotions': emotion_scores
-        }
 def create_emotion_plot(emotions):
-    fig = go.Figure(data=[
-        go.Bar(x=list(emotions.keys()), y=list(emotions.values()))
-    ])
-    fig.update_layout(
-        title='Emotion Analysis',
-        yaxis_range=[0, 1]
-    )
-    return fig.to_html()
 print("Initializing application...")
 analyzer = Analyzer()
 def process_audio(audio_file):
     try:
         print(f"Processing audio file: {audio_file}")
         results = analyzer.analyze(audio_file)
@@ -92,20 +146,28 @@ def process_audio(audio_file):
             create_emotion_plot(results['emotions'])
         )
     except Exception as e:
-        print(f"Error processing audio: {str(e)}")
-        return str(e), "Error in analysis"
 print("Creating Gradio interface...")
 interface = gr.Interface(
     fn=process_audio,
-    inputs=gr.Audio(sources=["microphone", "upload"]),  # Fixed parameter
     outputs=[
         gr.Textbox(label="Transcription"),
         gr.HTML(label="Emotion Analysis")
     ],
     title="Vocal Biomarker Analysis",
-    description="Analyze voice for emotional indicators"
 )
-print("Launching application...")
-interface.launch(share=False)

 class ModelManager:
     def __init__(self):
+        self.device = torch.device("cpu")
         self.models = {}
         self.tokenizers = {}
         self.processors = {}
     def load_models(self):
         print("Loading Whisper model...")
+        self.processors['whisper'] = WhisperProcessor.from_pretrained(
+            "openai/whisper-base",
+            device_map="cpu"
+        )
+        self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
+            "openai/whisper-base",
+            device_map="cpu"
+        )
         print("Loading emotion model...")
+        self.tokenizers['emotion'] = AutoTokenizer.from_pretrained(
+            "j-hartmann/emotion-english-distilroberta-base"
+        )
+        self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
+            "j-hartmann/emotion-english-distilroberta-base",
+            device_map="cpu"
+        )
 class AudioProcessor:
     def __init__(self):
         self.n_mfcc = 13
     def process_audio(self, audio_path):
+        try:
+            waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
+            return waveform, self._extract_features(waveform)
+        except Exception as e:
+            print(f"Error processing audio: {str(e)}")
+            raise
     def _extract_features(self, waveform):
+        try:
+            return {
+                'mfcc': librosa.feature.mfcc(y=waveform, sr=self.sample_rate, n_mfcc=self.n_mfcc),
+                'energy': librosa.feature.rms(y=waveform)[0]
+            }
+        except Exception as e:
+            print(f"Error extracting features: {str(e)}")
+            raise
 class Analyzer:
     def __init__(self):
         print("Analyzer initialization complete")
     def analyze(self, audio_path):
+        try:
+            print(f"Processing audio file: {audio_path}")
+            waveform, features = self.audio_processor.process_audio(audio_path)
+            print("Transcribing audio...")
+            inputs = self.model_manager.processors['whisper'](
+                waveform,
+                return_tensors="pt"
+            ).input_features
+            predicted_ids = self.model_manager.models['whisper'].generate(inputs)
+            transcription = self.model_manager.processors['whisper'].batch_decode(
+                predicted_ids,
+                skip_special_tokens=True
+            )[0]
+            print("Analyzing emotions...")
+            inputs = self.model_manager.tokenizers['emotion'](
+                transcription,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=512
+            )
+            outputs = self.model_manager.models['emotion'](**inputs)
+            emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+            emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
+            emotion_scores = {
+                label: float(score)
+                for label, score in zip(emotion_labels, emotions[0])
+            }
+            return {
+                'transcription': transcription,
+                'emotions': emotion_scores
+            }
+        except Exception as e:
+            print(f"Error in analysis: {str(e)}")
+            raise
 def create_emotion_plot(emotions):
+    try:
+        fig = go.Figure(data=[
+            go.Bar(
+                x=list(emotions.keys()),
+                y=list(emotions.values()),
+                marker_color='rgb(55, 83, 109)'
+            )
+        ])
+        fig.update_layout(
+            title='Emotion Analysis',
+            xaxis_title='Emotion',
+            yaxis_title='Score',
+            yaxis_range=[0, 1],
+            template='plotly_white',
+            height=400
+        )
+        return fig.to_html(include_plotlyjs=True)
+    except Exception as e:
+        print(f"Error creating plot: {str(e)}")
+        return "Error creating visualization"
 print("Initializing application...")
 analyzer = Analyzer()
 def process_audio(audio_file):
     try:
+        if audio_file is None:
+            return "No audio file provided", "Please provide an audio file"
         print(f"Processing audio file: {audio_file}")
         results = analyzer.analyze(audio_file)
             create_emotion_plot(results['emotions'])
         )
     except Exception as e:
+        error_msg = f"Error processing audio: {str(e)}"
+        print(error_msg)
+        return error_msg, "Error in analysis"
 print("Creating Gradio interface...")
 interface = gr.Interface(
     fn=process_audio,
+    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
     outputs=[
         gr.Textbox(label="Transcription"),
         gr.HTML(label="Emotion Analysis")
     ],
     title="Vocal Biomarker Analysis",
+    description="Analyze voice for emotional indicators",
+    examples=[],
+    cache_examples=False
 )
+if __name__ == "__main__":
+    print("Launching application...")
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )