Spaces:

kazalbrur
/

Bengali-English-Translation

Sleeping

App Files Files Community

kazalbrur commited on Aug 10

Commit

a09a909

•

1 Parent(s): a790d32

updated

Browse files

Files changed (1) hide show

app.py +43 -22

app.py CHANGED Viewed

@@ -2,11 +2,12 @@ import streamlit as st
 import numpy as np
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from normalizer import normalize
 # Set the page configuration
 st.set_page_config(
-    page_title="Bengali to English Translator App",
-    page_icon=":shield:",
     initial_sidebar_state="auto"
 )
@@ -14,30 +15,50 @@ st.set_page_config(
 with open("assets/style.css") as f:
     st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True)
-# Function to load the pre-trained model
 def get_model():
-    tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True)
-    model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5")
-    return tokenizer, model
 # Load the tokenizer and model
 tokenizer, model = get_model()
-# Add a header to the Streamlit app with custom CSS for black font color
-st.markdown("<h1 style='color:black;'>Bengali to English Translator</h1>", unsafe_allow_html=True)
-# Add placeholder text with custom CSS styling
-st.markdown("<span style='color:black'>Enter your Bengali text here</span>", unsafe_allow_html=True)
-# Text area for user input with label and height set to 250
-user_input = st.text_area("Enter your Bengali text here", "", height=250, label_visibility="collapsed")
-# Button for submitting the input
-submit_button = st.button("Translate")
-# Perform prediction when user input is provided and the submit button is clicked
-if user_input and submit_button:
-    input_ids = tokenizer(normalize(user_input), padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids
-    generated_tokens = model.generate(input_ids, max_new_tokens=128)
-    decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
-    st.write(f"<span style='color:black'>English Translation: {decoded_tokens}</span>", unsafe_allow_html=True)

 import numpy as np
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from normalizer import normalize
+import torch
 # Set the page configuration
 st.set_page_config(
+    page_title="NMT",  # Updated title as seen in the image
+    page_icon=":robot_face:",  # Use an emoji as the icon, similar to the robot face in the image
     initial_sidebar_state="auto"
 )
 with open("assets/style.css") as f:
     st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True)
+# Function to load the pre-trained model with caching
+@st.cache_resource
 def get_model():
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True)
+        model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5")
+        return tokenizer, model
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        return None, None
 # Load the tokenizer and model
 tokenizer, model = get_model()
+if tokenizer and model:
+    # Add a header with custom CSS for black font color
+    st.markdown("<h1 style='color:black;'>Translate</h1>", unsafe_allow_html=True)
+    # Add a subheader for "Source Text"
+    st.subheader("Source Text")
+    # Text area for user input with height set to 200
+    user_input = st.text_area("", "", height=200, max_chars=200)  # no label text, to match the image
+    # Button for submitting the input
+    submit_button = st.button("Translate")
+    # Divider between the input and output sections
+    st.markdown("<hr style='border:1px solid #ccc;'>", unsafe_allow_html=True)
+    # Perform prediction when user input is provided and the submit button is clicked
+    if user_input and submit_button:
+        try:
+            normalized_input = normalize(user_input)
+            input_ids = tokenizer(normalized_input, padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids
+            generated_tokens = model.generate(input_ids, max_new_tokens=128)
+            decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+            # Show the output in a similar box style
+            st.subheader("Translation")
+            st.markdown(f"<div style='background-color: #E8F4FE; padding: 10px; border-radius: 5px;'>{decoded_tokens}</div>", unsafe_allow_html=True)
+        except torch.cuda.OutOfMemoryError:
+            st.error("Out of memory error! Please try smaller input or refresh the page.")
+        except Exception as e:
+            st.error(f"An error occurred during translation: {str(e)}")
+else:
+    st.error("Model could not be loaded. Please check the model path and try again.")