File size: 2,744 Bytes
8d2760c a09a909 8d2760c a09a909 cc6486b 8d2760c a09a909 8d2760c a09a909 8d2760c a09a909 8d2760c a09a909 8d2760c a09a909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import streamlit as st
import numpy as np
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from normalizer import normalize
import torch
# Set the page configuration
st.set_page_config(
page_title="NMT", # Updated title as seen in the image
page_icon=":robot_face:", # Use an emoji as the icon, similar to the robot face in the image
initial_sidebar_state="auto"
)
# Load custom CSS styling
with open("assets/style.css") as f:
st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True)
# Function to load the pre-trained model with caching
@st.cache_resource
def get_model():
try:
tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5")
return tokenizer, model
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None, None
# Load the tokenizer and model
tokenizer, model = get_model()
if tokenizer and model:
# Add a header with custom CSS for black font color
st.markdown("<h1 style='color:black;'>Translate</h1>", unsafe_allow_html=True)
# Add a subheader for "Source Text"
st.subheader("Source Text")
# Text area for user input with height set to 200
user_input = st.text_area("", "", height=200, max_chars=200) # no label text, to match the image
# Button for submitting the input
submit_button = st.button("Translate")
# Divider between the input and output sections
st.markdown("<hr style='border:1px solid #ccc;'>", unsafe_allow_html=True)
# Perform prediction when user input is provided and the submit button is clicked
if user_input and submit_button:
try:
normalized_input = normalize(user_input)
input_ids = tokenizer(normalized_input, padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids
generated_tokens = model.generate(input_ids, max_new_tokens=128)
decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
# Show the output in a similar box style
st.subheader("Translation")
st.markdown(f"<div style='background-color: #E8F4FE; padding: 10px; border-radius: 5px;'>{decoded_tokens}</div>", unsafe_allow_html=True)
except torch.cuda.OutOfMemoryError:
st.error("Out of memory error! Please try smaller input or refresh the page.")
except Exception as e:
st.error(f"An error occurred during translation: {str(e)}")
else:
st.error("Model could not be loaded. Please check the model path and try again.")
|