kazalbrur commited on
Commit
8d2760c
1 Parent(s): 46f1eaa

initial commit

Browse files
Files changed (4) hide show
  1. README.md +5 -5
  2. app.py +51 -0
  3. gitattributes +35 -0
  4. requirements.txt +7 -0
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Bangla English Translation BanglaT5
3
- emoji: 🚀
4
- colorFrom: purple
5
- colorTo: pink
6
  sdk: streamlit
7
- sdk_version: 1.37.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
1
  ---
2
+ title: BanglishToBanglaTranslation
3
+ emoji: 🏢
4
+ colorFrom: gray
5
+ colorTo: purple
6
  sdk: streamlit
7
+ sdk_version: 1.37.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import streamlit as st
3
+ import numpy as np
4
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
+ from normalizer import normalize
6
+
7
+ # Set the page configuration
8
+ st.set_page_config(
9
+ page_title="Bengalai to English Translator App", # Title of the app displayed in the browser tab
10
+ page_icon=":shield:", # Path to a favicon or emoji to be displayed in the browser tab
11
+ initial_sidebar_state="auto" # Initial state of the sidebar ("auto", "expanded", or "collapsed")
12
+ )
13
+
14
+
15
+ # Load custom CSS styling
16
+ with open("assets/style.css") as f:
17
+ st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True)
18
+
19
+ # Function to load the pre-trained model
20
+ # @st.cache_data(experimental_allow_widgets=False)
21
+ def get_model():
22
+ tokenizer = AutoTokenizer.from_pretrained("kazalbrur/BanglaEnglishTokenizerBanglaT5", use_fast=True) # Set legacy=False
23
+ model = AutoModelForSeq2SeqLM.from_pretrained("kazalbrur/BanglaEnglishTranslationBanglaT5") # Set legacy=False
24
+ return tokenizer, model
25
+
26
+
27
+ # Load the tokenizer and model
28
+ tokenizer, model = get_model()
29
+
30
+
31
+ # Add a header to the Streamlit app
32
+ st.header("Benglai to English Translator")
33
+
34
+ # Add placeholder text with custom CSS styling
35
+ st.markdown("<span style='color:black'>Enter your Banglish text here</span>", unsafe_allow_html=True)
36
+
37
+ # Text area for user input with label and height set to 250
38
+ user_input = st.text_area("Enter your Banglish text here", "", height=250, label_visibility="collapsed")
39
+
40
+ # Button for submitting the input
41
+ submit_button = st.button("Translate")
42
+
43
+ # Perform prediction when user input is provided and the submit button is clicked
44
+ if user_input and submit_button:
45
+ input_ids = tokenizer(normalize(user_input), padding=True, truncation=True, max_length=128, return_tensors="pt").input_ids
46
+ generated_tokens = model.generate(input_ids, max_new_tokens=128) # Set max_new_tokens to control generation length
47
+ decoded_tokens = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
48
+ st.write(f"<span style='color:black'>Bangla Translation: {decoded_tokens}</span>", unsafe_allow_html=True)
49
+
50
+
51
+
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ git+https://github.com/csebuetnlp/normalizer
2
+ numpy==1.26.4
3
+ streamlit==1.31.1
4
+ torch
5
+ sentencepiece
6
+ transformers[sentencepiece]
7
+ transformers==4.38.2