abhicodes commited on
Commit
b09dde9
Β·
verified Β·
1 Parent(s): 1940014

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -34
app.py CHANGED
@@ -1,10 +1,44 @@
1
  import gradio as gr
 
 
 
 
 
2
  import whisper
3
  from transformers import pipeline
4
 
 
 
 
 
 
 
 
5
  model = whisper.load_model("base")
6
  sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def analyze_sentiment(text):
9
  results = sentiment_analysis(text)
10
  sentiment_results = {result['label']: result['score'] for result in results}
@@ -54,7 +88,9 @@ def display_sentiment_results(sentiment_results, option):
54
  sentiment_text += f"{sentiment} {emoji}: {score}\n"
55
  return sentiment_text
56
 
57
- def inference(audio, sentiment_option):
 
 
58
  audio = whisper.load_audio(audio)
59
  audio = whisper.pad_or_trim(audio)
60
 
@@ -66,15 +102,20 @@ def inference(audio, sentiment_option):
66
  options = whisper.DecodingOptions(fp16=False)
67
  result = whisper.decode(model, mel, options)
68
 
69
- sentiment_results = analyze_sentiment(result.text)
70
- sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
 
 
 
 
 
71
 
72
- return lang.upper(), result.text, sentiment_output
73
 
74
- title = """<h1 align="center">🎀 Multilingual ASR πŸ’¬</h1>"""
75
  image_path = "thmbnail.jpg"
76
  description = """
77
- πŸ’» This demo showcases a general-purpose speech recognition model called Whisper. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
78
  <br>
79
  βš™οΈ Components of the tool:<br>
80
  <br>
@@ -121,34 +162,32 @@ with block:
121
  with gr.Column():
122
  gr.HTML(description)
123
 
124
- with gr.Group():
125
  with gr.Row():
126
- audio = gr.Audio(
127
- label="Input Audio",
128
- show_label=False,
129
- type="filepath"
130
- )
131
-
132
- sentiment_option = gr.Radio(
133
- choices=["Sentiment Only", "Sentiment + Score"],
134
- label="Select an option",
135
- )
136
-
137
- btn = gr.Button("Transcribe")
138
-
139
- lang_str = gr.Textbox(label="Language")
140
-
141
- text = gr.Textbox(label="Transcription")
142
-
143
- sentiment_output = gr.Textbox(label="Sentiment Analysis Results")
144
-
145
- btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
146
-
147
- gr.HTML('''
148
- <div class="footer">
149
- <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
150
- </p>
151
- </div>
152
- ''')
153
 
154
  block.launch()
 
1
  import gradio as gr
2
+ import cv2
3
+ import easyocr
4
+ import numpy as np
5
+ import requests
6
+ import os
7
  import whisper
8
  from transformers import pipeline
9
 
10
+ API_KEY = os.getenv("API_KEY")
11
+
12
+ API_URL = "https://api-inference.huggingface.co/models/dima806/facial_emotions_image_detection"
13
+ headers = {"Authorization": "Bearer "+ API_KEY+""}
14
+
15
+ reader = easyocr.Reader(['en'], gpu=False)
16
+
17
  model = whisper.load_model("base")
18
  sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
19
 
20
+ def query(image):
21
+ image_data = np.array(image, dtype=np.uint8)
22
+ _, buffer = cv2.imencode('.jpg', image_data)
23
+ binary_data = buffer.tobytes()
24
+
25
+ response = requests.post(API_URL, headers=headers, data=binary_data)
26
+ return response.json()
27
+
28
+ def text_extraction(image):
29
+ global text_content
30
+ text_content = ''
31
+ facial_data = query(image)
32
+ text_ = reader.readtext(image)
33
+ threshold = 0.25
34
+ for t_, t in enumerate(text_):
35
+ bbox, text, score = t
36
+ text_content = text_content + ' ' + ' '.join(text)
37
+ if score > threshold:
38
+ cv2.rectangle(image, tuple(map(int, bbox[0])), tuple(map(int, bbox[2])), (0, 255, 0), 5)
39
+
40
+ return image, text_content, facial_data
41
+
42
  def analyze_sentiment(text):
43
  results = sentiment_analysis(text)
44
  sentiment_results = {result['label']: result['score'] for result in results}
 
88
  sentiment_text += f"{sentiment} {emoji}: {score}\n"
89
  return sentiment_text
90
 
91
+ def inference(image, text, audio, sentiment_option):
92
+ extracted_image, extracted_text, extracted_facial_data = text_extraction(image)
93
+
94
  audio = whisper.load_audio(audio)
95
  audio = whisper.pad_or_trim(audio)
96
 
 
102
  options = whisper.DecodingOptions(fp16=False)
103
  result = whisper.decode(model, mel, options)
104
 
105
+ audio_sentiment_results = analyze_sentiment(result.text) # Ta - Text from audio
106
+ image_sentiment_results = analyze_sentiment(extracted_text) # Ti - Text from image
107
+ text_sentiment_results = analyze_sentiment(text) # T - User defined Text
108
+
109
+ audio_sentiment_output = display_sentiment_results(audio_sentiment_results, sentiment_option)
110
+ image_sentiment_output = display_sentiment_results(image_sentiment_results, sentiment_option)
111
+ text_sentiment_output = display_sentiment_results(text_sentiment_results, sentiment_option)
112
 
113
+ return extracted_image, extracted_facial_data, extracted_text, image_sentiment_output, text_sentiment_output, lang.upper(), result.text, sentiment_output
114
 
115
+ title = """<h1 align="center">Cross Model Machine Learning (Sentiment Analysis)</h1>"""
116
  image_path = "thmbnail.jpg"
117
  description = """
118
+ πŸ’» This demo showcases a Cross Model Machine Learning for Sentiment Analysis.<br><br>
119
  <br>
120
  βš™οΈ Components of the tool:<br>
121
  <br>
 
162
  with gr.Column():
163
  gr.HTML(description)
164
 
165
+ with gr.Blocks():
166
  with gr.Row():
167
+ with gr.Column():
168
+ image = gr.Image()
169
+
170
+ image_output = gr.Image()
171
+ text_output = gr.Textbox(label="Text Content")
172
+ text_sentiment = gr.Textbox(label="Text Sentiment")
173
+ facial_output = gr.JSON(label="Facial Data")
174
+
175
+ with gr.Text():
176
+ gr.Textbox(label="Text Content")
177
+
178
+ output_text_sentiment = gr.TextBox("Text Sentiment")
179
+
180
+ with gr.Column():
181
+ audio = gr.Audio(label="Input Audio", show_label=False, type="filepath")
182
+ sentiment_option = gr.Radio(choices=["Sentiment Only", "Sentiment + Score"], label="Select an option")
183
+
184
+ lang_str = gr.Textbox(label="Language")
185
+ text = gr.Textbox(label="Transcription")
186
+ sentiment_output = gr.Textbox(label="Sentiment Analysis Results")
187
+
188
+
189
+ btn = gr.Button("Transcribe")
190
+
191
+ btn.click(inference, inputs=[image, text, audio, sentiment_option], outputs=[image_output, facial_output, text_output, text_sentiment, output_text_sentiment, lang_str, text, sentiment_output])
 
 
192
 
193
  block.launch()