andromeda01111 commited on
Commit
8080438
·
verified ·
1 Parent(s): 2b8003b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import torch
2
  import torch.nn as nn
3
  import torch.nn.functional as F
@@ -11,14 +12,11 @@ import numpy as np
11
  import pandas as pd
12
  import os
13
 
14
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
-
16
- model_name_or_path = ""
17
  config = AutoConfig.from_pretrained(model_name_or_path)
18
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
19
  sampling_rate = feature_extractor.sampling_rate
20
- model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path).to(device)
21
-
22
 
23
  def speech_file_to_array_fn(path, sampling_rate):
24
  speech_array, _sampling_rate = torchaudio.load(path)
@@ -26,31 +24,38 @@ def speech_file_to_array_fn(path, sampling_rate):
26
  speech = resampler(speech_array).squeeze().numpy()
27
  return speech
28
 
29
-
30
  def predict(path, sampling_rate):
31
  speech = speech_file_to_array_fn(path, sampling_rate)
32
  features = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
33
 
34
- input_values = features.input_values.to(device)
35
- attention_mask = features.attention_mask.to(device)
36
 
37
  with torch.no_grad():
38
  logits = model(input_values, attention_mask=attention_mask).logits
39
 
40
  scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
41
  outputs = [{"Emotion": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
 
42
  return outputs
43
 
 
 
 
 
 
 
 
 
44
 
45
- demo = gr.Interface(
46
- fn=predict,
47
- inputs=gr.Audio(source="upload", type="filepath"),
48
- outputs="text"
49
- title="Sentiment anlysis",
50
- # description=description,
51
- # examples=examples,
52
- # allow_flagging="never"
53
- )
54
 
 
55
  if __name__ == "__main__":
56
- demo.launch()
 
1
+ import gradio as gr
2
  import torch
3
  import torch.nn as nn
4
  import torch.nn.functional as F
 
12
  import pandas as pd
13
  import os
14
 
15
+ model_name_or_path = "andromeda01111/Malayalam_SA"
 
 
16
  config = AutoConfig.from_pretrained(model_name_or_path)
17
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
18
  sampling_rate = feature_extractor.sampling_rate
19
+ model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path)
 
20
 
21
  def speech_file_to_array_fn(path, sampling_rate):
22
  speech_array, _sampling_rate = torchaudio.load(path)
 
24
  speech = resampler(speech_array).squeeze().numpy()
25
  return speech
26
 
 
27
  def predict(path, sampling_rate):
28
  speech = speech_file_to_array_fn(path, sampling_rate)
29
  features = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
30
 
31
+ input_values = features.input_values
32
+ attention_mask = features.attention_mask
33
 
34
  with torch.no_grad():
35
  logits = model(input_values, attention_mask=attention_mask).logits
36
 
37
  scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
38
  outputs = [{"Emotion": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
39
+
40
  return outputs
41
 
42
+ # Wrapper function for Gradio
43
+ def gradio_predict(audio):
44
+ predictions = predict(audio)
45
+ return [f"{pred['Emotion']}: {pred['Score']}" for pred in predictions]
46
+
47
+ # Gradio interface
48
+ emotions = [config.id2label[i] for i in range(len(config.id2label))]
49
+ outputs = [gr.Textbox(label=emotion, interactive=False) for emotion in emotions]
50
 
51
+ interface = gr.Interface(
52
+ fn=gradio_predict,
53
+ inputs=gr.Audio(source="upload", type="filepath", label="Upload Audio"),
54
+ outputs=outputs,
55
+ title="Emotion Recognition",
56
+ description="Upload an audio file to predict emotions and their corresponding percentages.",
57
+ )
 
 
58
 
59
+ # Launch the app
60
  if __name__ == "__main__":
61
+ interface.launch()