|
|
|
import streamlit as st |
|
import numpy as np |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
from normalizer import normalize |
|
|
|
|
|
st.set_page_config( |
|
page_title="Bengalai to English Translator App", |
|
page_icon=":shield:", |
|
initial_sidebar_state="auto" |
|
) |
|
|
|
|
|
|
|
with open("assets/style.css") as f: |
|
st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True) |
|
|
|
|
|
|
|
def get_model(): |
|
tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True) |
|
model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5") |
|
return tokenizer, model |
|
|
|
|
|
|
|
tokenizer, model = get_model() |
|
|
|
|
|
|
|
st.header("Benglai to English Translator") |
|
|
|
|
|
st.markdown("<span style='color:black'>Enter your Banglish text here</span>", unsafe_allow_html=True) |
|
|
|
|
|
user_input = st.text_area("Enter your Banglish text here", "", height=250, label_visibility="collapsed") |
|
|
|
|
|
submit_button = st.button("Translate") |
|
|
|
|
|
if user_input and submit_button: |
|
input_ids = tokenizer(normalize(user_input), padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids |
|
generated_tokens = model.generate(input_ids, max_new_tokens=128) |
|
decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
|
st.write(f"<span style='color:black'>Bangla Translation: {decoded_tokens}</span>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|