Spaces:

kazalbrur
/

Bengali-English-Translation

Sleeping

File size: 2,744 Bytes

import streamlit as st
import numpy as np
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from normalizer import normalize
import torch

# Set the page configuration
st.set_page_config(
    page_title="NMT",  # Updated title as seen in the image
    page_icon=":robot_face:",  # Use an emoji as the icon, similar to the robot face in the image
    initial_sidebar_state="auto"
)

# Load custom CSS styling
with open("assets/style.css") as f:
    st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True)

# Function to load the pre-trained model with caching
@st.cache_resource
def get_model():
    try:
        tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True)
        model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5")
        return tokenizer, model
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")
        return None, None

# Load the tokenizer and model
tokenizer, model = get_model()

if tokenizer and model:
    # Add a header with custom CSS for black font color
    st.markdown("<h1 style='color:black;'>Translate</h1>", unsafe_allow_html=True)

    # Add a subheader for "Source Text"
    st.subheader("Source Text")
    
    # Text area for user input with height set to 200
    user_input = st.text_area("", "", height=200, max_chars=200)  # no label text, to match the image
    
    # Button for submitting the input
    submit_button = st.button("Translate")
    
    # Divider between the input and output sections
    st.markdown("<hr style='border:1px solid #ccc;'>", unsafe_allow_html=True)

    # Perform prediction when user input is provided and the submit button is clicked
    if user_input and submit_button:
        try:
            normalized_input = normalize(user_input)
            input_ids = tokenizer(normalized_input, padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids
            generated_tokens = model.generate(input_ids, max_new_tokens=128)
            decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
            
            # Show the output in a similar box style
            st.subheader("Translation")
            st.markdown(f"<div style='background-color: #E8F4FE; padding: 10px; border-radius: 5px;'>{decoded_tokens}</div>", unsafe_allow_html=True)
        except torch.cuda.OutOfMemoryError:
            st.error("Out of memory error! Please try smaller input or refresh the page.")
        except Exception as e:
            st.error(f"An error occurred during translation: {str(e)}")
else:
    st.error("Model could not be loaded. Please check the model path and try again.")