|
import streamlit as st |
|
import numpy as np |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
from normalizer import normalize |
|
|
|
|
|
st.set_page_config( |
|
page_title="Bengali to English Translator App", |
|
page_icon=":shield:", |
|
initial_sidebar_state="auto" |
|
) |
|
|
|
|
|
with open("assets/style.css") as f: |
|
st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True) |
|
|
|
|
|
def get_model(): |
|
tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True) |
|
model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5") |
|
return tokenizer, model |
|
|
|
|
|
tokenizer, model = get_model() |
|
|
|
|
|
st.markdown("<h1 style='color:black;'>Bengali to English Translator</h1>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<span style='color:black'>Enter your Bengali text here</span>", unsafe_allow_html=True) |
|
|
|
|
|
user_input = st.text_area("Enter your Bengali text here", "", height=250, label_visibility="collapsed") |
|
|
|
|
|
submit_button = st.button("Translate") |
|
|
|
|
|
if user_input and submit_button: |
|
input_ids = tokenizer(normalize(user_input), padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids |
|
generated_tokens = model.generate(input_ids, max_new_tokens=128) |
|
decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
|
st.write(f"<span style='color:black'>English Translation: {decoded_tokens}</span>", unsafe_allow_html=True) |
|
|