Spaces:
Runtime error
Runtime error
File size: 1,883 Bytes
c768111 4800fde c7a9e39 4800fde c768111 c7a9e39 c768111 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import gradio as gr
import torch
import soundfile as sf
import librosa
# load model and processor
model_name = "OthmaneJ/distil-wav2vec2"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)
# quantization
# model.eval()
# model_int8 = torch.quantization.quantize_dynamic(model, dtype=torch.qint8,inplace = True,)
# define function to read in sound file
# def map_to_array(file):
# speech, sample_rate = sf.read(file)
# return speech, sample_rate
# tokenize
def inference(audio):
# read in sound file
speech, _ = librosa.load(audio.name,sr=16_000)
input_values = processor(speech, sampling_rate=16_000, return_tensors="pt", padding="longest").input_values # Batch size 1
# retrieve logits
logits = model(input_values).logits
# take argmax and decode
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)
return transcription[0]
inputs = gr.inputs.Audio(label="Input Audio", type="file")
outputs = gr.outputs.Textbox(label="Output Text")
title = "distilled wav2vec 2.0"
description = "Gradio demo for a distilled wav2vec 2.0 (4x faster than large wav2vec 2.0, and 16x times smaller than base wav2vec 2.0 if combined with quantization). To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav and .flac files"
article = "<p style='text-align: center'><a href='https://github.com/OthmaneJ/distil-wav2vec2' target='_blank'> Github repo for demonstration </a> | <a href='https://huggingface.co/OthmaneJ/distil-wav2vec2' target='_blank'>Pretrained model</a></p>"
examples=[['poem.wav']]
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch() |