Spaces:

abhicodes
/

Cross-Modal-Sentiment-Analysis

Sleeping

App Files Files Community

abhicodes commited on Jan 17, 2024

Commit

b09dde9

verified ·

1 Parent(s): 1940014

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -34

app.py CHANGED Viewed

@@ -1,10 +1,44 @@
 import gradio as gr
 import whisper
 from transformers import pipeline
 model = whisper.load_model("base")
 sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
 def analyze_sentiment(text):
     results = sentiment_analysis(text)
     sentiment_results = {result['label']: result['score'] for result in results}
@@ -54,7 +88,9 @@ def display_sentiment_results(sentiment_results, option):
             sentiment_text += f"{sentiment} {emoji}: {score}\n"
     return sentiment_text
-def inference(audio, sentiment_option):
     audio = whisper.load_audio(audio)
     audio = whisper.pad_or_trim(audio)
@@ -66,15 +102,20 @@ def inference(audio, sentiment_option):
     options = whisper.DecodingOptions(fp16=False)
     result = whisper.decode(model, mel, options)
-    sentiment_results = analyze_sentiment(result.text)
-    sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
-    return lang.upper(), result.text, sentiment_output
-title = """<h1 align="center">🎤 Multilingual ASR 💬</h1>"""
 image_path = "thmbnail.jpg"
 description = """
-💻 This demo showcases a general-purpose speech recognition model called Whisper. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
 <br>
 ⚙️ Components of the tool:<br>
 <br>
@@ -121,34 +162,32 @@ with block:
         with gr.Column():
             gr.HTML(description)
-    with gr.Group():
         with gr.Row():
-            audio = gr.Audio(
-                label="Input Audio",
-                show_label=False,
-                type="filepath"
-            )
-            sentiment_option = gr.Radio(
-                choices=["Sentiment Only", "Sentiment + Score"],
-                label="Select an option",
-            )
-            btn = gr.Button("Transcribe")
-        lang_str = gr.Textbox(label="Language")
-        text = gr.Textbox(label="Transcription")
-        sentiment_output = gr.Textbox(label="Sentiment Analysis Results")
-        btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
-        gr.HTML('''
-        <div class="footer">
-            <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
-            </p>
-        </div>
-        ''')
 block.launch()

 import gradio as gr
+import cv2
+import easyocr
+import numpy as np
+import requests
+import os
 import whisper
 from transformers import pipeline
+API_KEY = os.getenv("API_KEY")
+API_URL = "https://api-inference.huggingface.co/models/dima806/facial_emotions_image_detection"
+headers = {"Authorization": "Bearer "+ API_KEY+""}
+reader = easyocr.Reader(['en'], gpu=False)
 model = whisper.load_model("base")
 sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
+def query(image):
+    image_data = np.array(image, dtype=np.uint8)
+    _, buffer = cv2.imencode('.jpg', image_data)
+    binary_data = buffer.tobytes()
+    response = requests.post(API_URL, headers=headers, data=binary_data)
+    return response.json()
+def text_extraction(image):
+    global text_content
+    text_content = ''
+    facial_data = query(image)
+    text_ = reader.readtext(image)
+    threshold = 0.25
+    for t_, t in enumerate(text_):
+        bbox, text, score = t
+        text_content = text_content + ' ' + ' '.join(text)
+        if score > threshold:
+            cv2.rectangle(image, tuple(map(int, bbox[0])), tuple(map(int, bbox[2])), (0, 255, 0), 5)
+    return image, text_content, facial_data
 def analyze_sentiment(text):
     results = sentiment_analysis(text)
     sentiment_results = {result['label']: result['score'] for result in results}
             sentiment_text += f"{sentiment} {emoji}: {score}\n"
     return sentiment_text
+def inference(image, text, audio, sentiment_option):
+    extracted_image, extracted_text, extracted_facial_data = text_extraction(image)
     audio = whisper.load_audio(audio)
     audio = whisper.pad_or_trim(audio)
     options = whisper.DecodingOptions(fp16=False)
     result = whisper.decode(model, mel, options)
+    audio_sentiment_results = analyze_sentiment(result.text)            # Ta - Text from audio
+    image_sentiment_results = analyze_sentiment(extracted_text)         # Ti - Text from image
+    text_sentiment_results = analyze_sentiment(text)                    # T  - User defined Text
+    audio_sentiment_output = display_sentiment_results(audio_sentiment_results, sentiment_option)
+    image_sentiment_output = display_sentiment_results(image_sentiment_results, sentiment_option)
+    text_sentiment_output = display_sentiment_results(text_sentiment_results, sentiment_option)
+    return extracted_image, extracted_facial_data, extracted_text, image_sentiment_output, text_sentiment_output, lang.upper(), result.text, sentiment_output
+title = """<h1 align="center">Cross Model Machine Learning (Sentiment Analysis)</h1>"""
 image_path = "thmbnail.jpg"
 description = """
+💻 This demo showcases a Cross Model Machine Learning for Sentiment Analysis.<br><br>
 <br>
 ⚙️ Components of the tool:<br>
 <br>
         with gr.Column():
             gr.HTML(description)
+    with gr.Blocks():
         with gr.Row():
+            with gr.Column():
+                image = gr.Image()
+                image_output = gr.Image()
+                text_output = gr.Textbox(label="Text Content")
+                text_sentiment = gr.Textbox(label="Text Sentiment")
+                facial_output = gr.JSON(label="Facial Data")
+            with gr.Text():
+                gr.Textbox(label="Text Content")
+                output_text_sentiment = gr.TextBox("Text Sentiment")
+            with gr.Column():
+                audio = gr.Audio(label="Input Audio", show_label=False, type="filepath")
+                sentiment_option = gr.Radio(choices=["Sentiment Only", "Sentiment + Score"], label="Select an option")
+                lang_str = gr.Textbox(label="Language")
+                text = gr.Textbox(label="Transcription")
+                sentiment_output = gr.Textbox(label="Sentiment Analysis Results")
+        btn = gr.Button("Transcribe")
+        btn.click(inference, inputs=[image, text, audio, sentiment_option], outputs=[image_output, facial_output, text_output, text_sentiment, output_text_sentiment, lang_str, text, sentiment_output])
 block.launch()