eos_pad_cal / app.py
camparchimedes's picture
Update app.py
dc1f5e4 verified
import torch
import gradio as gr
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
import io
import sys
def test_eos_pad():
raw_text_batch = 'a'
# Capture print statements
old_stdout = sys.stdout
new_stdout = io.StringIO()
sys.stdout = new_stdout
# Load the processor and model for the NbAiLab Whisper model
processor = AutoProcessor.from_pretrained("NbAiLab/nb-whisper-large-verbatim")
model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLab/nb-whisper-large-verbatim")
# Check if pad token is set, if not, set it to eos token
if processor.tokenizer.pad_token_id is None:
processor.tokenizer.pad_token = processor.tokenizer.eos_token
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f'{processor.tokenizer.eos_token=}')
print(f'{processor.tokenizer.eos_token_id=}')
print(f'{processor.tokenizer.pad_token=}')
print(f'{processor.tokenizer.pad_token_id=}')
# Tokenize the input batch
tokenize_batch = processor.tokenizer(raw_text_batch, padding="max_length", max_length=5, truncation=True, return_tensors="pt")
print(f'{tokenize_batch=}')
print('Done')
# Restore the original stdout and return the captured output
sys.stdout = old_stdout
output = new_stdout.getvalue()
return output
iface = gr.Interface(
fn=test_eos_pad,
inputs=[],
outputs=gr.Textbox(label="Results"),
title="Check EOS and PAD Tokens",
description="This Gradio interface displays the output of the test_eos_pad function."
)
if __name__ == "__main__":
iface.launch()