kazalbrur's picture
updated
edebf03 verified
import streamlit as st
import numpy as np
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from normalizer import normalize
import torch
# Set the page configuration
st.set_page_config(
page_title="NMT",
page_icon=":robot_face:",
initial_sidebar_state="auto"
)
# Custom CSS for visible border in the text area
st.markdown(
"""
<style>
.stTextArea textarea {
border: 2px solid #ccc; /* Customize the border color and width */
border-radius: 5px; /* Optional: rounded corners */
padding: 10px; /* Optional: padding inside the text area */
}
</style>
""",
unsafe_allow_html=True
)
# Function to load the pre-trained model with caching
@st.cache_resource
def get_model():
try:
tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5")
return tokenizer, model
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None, None
# Load the tokenizer and model
tokenizer, model = get_model()
if tokenizer and model:
# Add a header with custom CSS for black font color
st.markdown("<h1 style='color:black;'>Translate</h1>", unsafe_allow_html=True)
# Add a subheader for "Source Text"
st.subheader("Source Text")
# Text area for user input with height set to 200 and visible border
user_input = st.text_area("", "", height=200, max_chars=200) # no label text, to match the image
# Button for submitting the input
submit_button = st.button("Translate")
# Divider between the input and output sections
st.markdown("<hr style='border:1px solid #ccc;'>", unsafe_allow_html=True)
# Perform prediction when user input is provided and the submit button is clicked
if user_input and submit_button:
try:
normalized_input = normalize(user_input)
input_ids = tokenizer(normalized_input, padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids
generated_tokens = model.generate(input_ids, max_new_tokens=128)
decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
# Show the output in a similar box style
st.subheader("Translation")
st.markdown(f"<div style='background-color: #E8F4FE; padding: 10px; border-radius: 5px;'>{decoded_tokens}</div>", unsafe_allow_html=True)
except torch.cuda.OutOfMemoryError:
st.error("Out of memory error! Please try smaller input or refresh the page.")
except Exception as e:
st.error(f"An error occurred during translation: {str(e)}")
else:
st.error("Model could not be loaded. Please check the model path and try again.")