Spaces:

gabrielblins
/

ASR_NewsClassifier_PTBR

Runtime error

App Files Files Community

ASR_NewsClassifier_PTBR / app.py

Gabriel Lins

Added app file and requirements

fa0a990 10 months ago

raw

history blame contribute delete

No virus

2.12 kB

	import gradio as gr
	import torch
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

	model_id = 'ClaudianoLeonardo/bert-finetuned_news_classifier-portuguese'
	tokenizer_classifier = AutoTokenizer.from_pretrained(model_id)
	model_classifier = AutoModelForSequenceClassification.from_pretrained(model_id)

	model_id2 = "Stopwolf/distil-whisper-large-v2-pt"

	# Carregar modelos do Hugging Face
	whisper_model = pipeline('automatic-speech-recognition', model = model_id2)

	text_classification_model = AutoModelForSequenceClassification.from_pretrained(model_id)
	text_classification_tokenizer = AutoTokenizer.from_pretrained(model_id)

	id2label = {0:'economia', 1:'esportes', 2:'famosos', 3:'politica', 4:'tecnologia'}

	def get_text(logits):
	sigmoid = torch.nn.Sigmoid()
	probs = sigmoid(logits.squeeze().cpu())
	predictions = np.zeros(probs.shape)
	predictions[np.where(probs >= 0.5)] = 1
	predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
	return predicted_labels[0]

	# Função para realizar a inferência
	def inference(audio):
	# Realizar inferência no modelo Whisper para reconhecimento de fala
	# Obter texto da saída do modelo Whisper
	try:
	sr, y = audio
	except:
	return "Erro ao carregar o áudio ou insira um áudio válido"

	y = y.astype(np.float32)
	y /= np.max(np.abs(y))
	transcribed_text = whisper_model({"sampling_rate": sr, "raw": y})["text"]

	# Realizar inferência no modelo de classificação de texto
	text_input = text_classification_tokenizer(transcribed_text, return_tensors="pt", padding=True)
	text_output = text_classification_model(**text_input)
	# Obter a classe predita
	predicted_class = get_text(text_output["logits"])

	return f"Texto transcrito: {transcribed_text}\nClasse predita: {predicted_class}"

	# Criar interface gráfica com Gradio
	iface = gr.Interface(
	fn=inference,
	inputs=gr.Audio(),
	outputs="text",
	live=True
	)

	# Executar a interface
	iface.launch(debug=True)