File size: 2,135 Bytes
8d2760c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# Import necessary libraries
import streamlit as st
import numpy as np
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from normalizer import normalize
# Set the page configuration
st.set_page_config(
page_title="Bengalai to English Translator App", # Title of the app displayed in the browser tab
page_icon=":shield:", # Path to a favicon or emoji to be displayed in the browser tab
initial_sidebar_state="auto" # Initial state of the sidebar ("auto", "expanded", or "collapsed")
)
# Load custom CSS styling
with open("assets/style.css") as f:
st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True)
# Function to load the pre-trained model
# @st.cache_data(experimental_allow_widgets=False)
def get_model():
tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True) # Set legacy=False
model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5") # Set legacy=False
return tokenizer, model
# Load the tokenizer and model
tokenizer, model = get_model()
# Add a header to the Streamlit app
st.header("Benglai to English Translator")
# Add placeholder text with custom CSS styling
st.markdown("<span style='color:black'>Enter your Banglish text here</span>", unsafe_allow_html=True)
# Text area for user input with label and height set to 250
user_input = st.text_area("Enter your Banglish text here", "", height=250, label_visibility="collapsed")
# Button for submitting the input
submit_button = st.button("Translate")
# Perform prediction when user input is provided and the submit button is clicked
if user_input and submit_button:
input_ids = tokenizer(normalize(user_input), padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids
generated_tokens = model.generate(input_ids, max_new_tokens=128) # Set max_new_tokens to control generation length
decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
st.write(f"<span style='color:black'>Bangla Translation: {decoded_tokens}</span>", unsafe_allow_html=True)
|