storresbusquets commited on
Commit
3f534ce
โ€ข
1 Parent(s): 86a552a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -0
app.py CHANGED
@@ -303,6 +303,111 @@ class GradioInference:
303
  wordcloud_image,
304
  )
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  gio = GradioInference()
308
  title = "YouTube Insights"
@@ -409,6 +514,44 @@ with block as demo:
409
  outputs=[text, summary, keywords, label, wordcloud_image],
410
  )
411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
 
413
  with block:
414
  gr.Markdown("### Video Examples")
 
303
  wordcloud_image,
304
  )
305
 
306
+
307
+ def from_article(self, article, progress=gr.Progress()):
308
+ """
309
+ Call the Gradio Inference python class.
310
+ Uses it directly the Whisper model to perform Automatic Speech Recognition (i.e Speech-to-Text).
311
+ Once the function has the transcription of the video it proccess it to obtain:
312
+ - Summary: using Facebook's BART transformer.
313
+ - KeyWords: using VoiceLabT5 keyword extractor.
314
+ - Sentiment Analysis: using Hugging Face's default sentiment classifier
315
+ - WordCloud: using the wordcloud python library.
316
+ """
317
+ progress(0, desc="Starting analysis")
318
+
319
+ progress(0.30, desc="Summarizing")
320
+
321
+ # Perform summarization on the transcription
322
+ transcription_summary = self.bart_summarizer(
323
+ results["text"], max_length=150, min_length=30, do_sample=False, truncation=True
324
+ )
325
+
326
+ #### Resumen multilingue
327
+ WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
328
+
329
+ input_ids_sum = self.mt5_tokenizer(
330
+ [WHITESPACE_HANDLER(results["text"])],
331
+ return_tensors="pt",
332
+ padding="max_length",
333
+ truncation=True,
334
+ max_length=512
335
+ )["input_ids"]
336
+
337
+ output_ids_sum = self.mt5_model.generate(
338
+ input_ids=input_ids_sum,
339
+ max_length=130,
340
+ no_repeat_ngram_size=2,
341
+ num_beams=4
342
+ )[0]
343
+
344
+ summary = self.mt5_tokenizer.decode(
345
+ output_ids_sum,
346
+ skip_special_tokens=True,
347
+ clean_up_tokenization_spaces=False
348
+ )
349
+ #### Fin resumen multilingue
350
+
351
+ progress(0.60, desc="Extracting Keywords")
352
+
353
+ # Extract keywords using VoiceLabT5
354
+ task_prefix = "Keywords: "
355
+ input_sequence = task_prefix + results["text"]
356
+
357
+ input_ids = self.keyword_tokenizer(
358
+ input_sequence,
359
+ return_tensors="pt",
360
+ truncation=False
361
+ ).input_ids
362
+
363
+ output = self.keyword_model.generate(
364
+ input_ids,
365
+ no_repeat_ngram_size=3,
366
+ num_beams=4
367
+ )
368
+ predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
369
+ keywords = [x.strip() for x in predicted.split(",") if x.strip()]
370
+ formatted_keywords = "\n".join([f"โ€ข {keyword}" for keyword in keywords])
371
+
372
+ progress(0.80, desc="Extracting Sentiment")
373
+
374
+ # Define a dictionary to map labels to emojis
375
+ sentiment_emojis = {
376
+ "positive": "Positive ๐Ÿ‘๐Ÿผ",
377
+ "negative": "Negative ๐Ÿ‘Ž๐Ÿผ",
378
+ "neutral": "Neutral ๐Ÿ˜ถ",
379
+ }
380
+
381
+ # Sentiment label
382
+ label = self.classifier(summary)[0]["label"]
383
+
384
+ # Format the label with emojis
385
+ formatted_sentiment = sentiment_emojis.get(label, label)
386
+
387
+ progress(0.90, desc="Generating Wordcloud")
388
+ # WordCloud object
389
+ wordcloud = WordCloud(colormap = "Oranges").generate(
390
+ results["text"]
391
+ )
392
+ wordcloud_image = wordcloud.to_image()
393
+
394
+ if lang == "english" or lang == "none":
395
+ return (
396
+ results["text"],
397
+ transcription_summary[0]["summary_text"],
398
+ formatted_keywords,
399
+ formatted_sentiment,
400
+ wordcloud_image,
401
+ )
402
+ else:
403
+ return (
404
+ results["text"],
405
+ summary,
406
+ formatted_keywords,
407
+ formatted_sentiment,
408
+ wordcloud_image,
409
+ )
410
+
411
 
412
  gio = GradioInference()
413
  title = "YouTube Insights"
 
514
  outputs=[text, summary, keywords, label, wordcloud_image],
515
  )
516
 
517
+ with gr.Tab("From Article ๐Ÿ“‹"):
518
+ with gr.Box():
519
+
520
+ with gr.Row().style(equal_height=True):
521
+ size = gr.Dropdown(
522
+ label="Model Size", choices=gio.sizes, value="base"
523
+ )
524
+ lang = gr.Dropdown(
525
+ label="Language (Optional)", choices=gio.langs, value="none"
526
+ )
527
+
528
+ with gr.Row().style(equal_height=True):
529
+ article = gr.Textbox(
530
+ label="Transcription",
531
+ placeholder="Paste your text...",
532
+ lines=10,
533
+ ).style(show_copy_button=True, container=False)
534
+
535
+ with gr.Row().style(equal_height=True):
536
+ summary = gr.Textbox(
537
+ label="Summary", placeholder="Summary Output", lines=5
538
+ )
539
+ keywords = gr.Textbox(
540
+ label="Keywords", placeholder="Keywords Output", lines=5
541
+ )
542
+ label = gr.Label(label="Sentiment Analysis")
543
+ wordcloud_image = gr.Image(label="WordCloud")
544
+
545
+ with gr.Row().style(equal_height=True):
546
+ clear = gr.ClearButton([audio_file,text, summary, keywords, label, wordcloud_image], scale=1, value="Clear ๐Ÿ—‘๏ธ")
547
+ btn = gr.Button(
548
+ "Get audio insights ๐Ÿ”Ž", variant="primary", scale=1
549
+ )
550
+ btn.click(
551
+ gio.from_audio_input,
552
+ inputs=[lang, size, article],
553
+ outputs=[summary, keywords, label, wordcloud_image],
554
+ )
555
 
556
  with block:
557
  gr.Markdown("### Video Examples")