Spaces:
Runtime error
Runtime error
datascientist22
commited on
Commit
•
51d3578
1
Parent(s):
4e07da5
Upload 2 files
Browse files- app.py +149 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from transformers import MarianMTModel, MarianTokenizer
|
3 |
+
|
4 |
+
# Define a dictionary to map language names to model identifiers
|
5 |
+
models = {
|
6 |
+
'Afrikaans': 'Helsinki-NLP/opus-mt-en-af',
|
7 |
+
'Amharic': 'Helsinki-NLP/opus-mt-en-am',
|
8 |
+
'Arabic': 'Helsinki-NLP/opus-mt-en-ar',
|
9 |
+
'Asturian': 'Helsinki-NLP/opus-mt-en-ast',
|
10 |
+
'Azerbaijani': 'Helsinki-NLP/opus-mt-en-az',
|
11 |
+
'Bashkir': 'Helsinki-NLP/opus-mt-en-ba',
|
12 |
+
'Belarusian': 'Helsinki-NLP/opus-mt-en-be',
|
13 |
+
'Bulgarian': 'Helsinki-NLP/opus-mt-en-bg',
|
14 |
+
'Bengali': 'Helsinki-NLP/opus-mt-en-bn',
|
15 |
+
'Breton': 'Helsinki-NLP/opus-mt-en-br',
|
16 |
+
'Bosnian': 'Helsinki-NLP/opus-mt-en-bs',
|
17 |
+
'Catalan': 'Helsinki-NLP/opus-mt-en-ca',
|
18 |
+
'Cebuano': 'Helsinki-NLP/opus-mt-en-ceb',
|
19 |
+
'Czech': 'Helsinki-NLP/opus-mt-en-cs',
|
20 |
+
'Welsh': 'Helsinki-NLP/opus-mt-en-cy',
|
21 |
+
'Danish': 'Helsinki-NLP/opus-mt-en-da',
|
22 |
+
'German': 'Helsinki-NLP/opus-mt-en-de',
|
23 |
+
'Greek': 'Helsinki-NLP/opus-mt-en-el',
|
24 |
+
'English': 'Helsinki-NLP/opus-mt-en-en',
|
25 |
+
'Spanish': 'Helsinki-NLP/opus-mt-en-es',
|
26 |
+
'Estonian': 'Helsinki-NLP/opus-mt-en-et',
|
27 |
+
'Persian': 'Helsinki-NLP/opus-mt-en-fa',
|
28 |
+
'Fulah': 'Helsinki-NLP/opus-mt-en-ff',
|
29 |
+
'Finnish': 'Helsinki-NLP/opus-mt-en-fi',
|
30 |
+
'French': 'Helsinki-NLP/opus-mt-en-fr',
|
31 |
+
'Western Frisian': 'Helsinki-NLP/opus-mt-en-fy',
|
32 |
+
'Irish': 'Helsinki-NLP/opus-mt-en-ga',
|
33 |
+
'Scottish Gaelic': 'Helsinki-NLP/opus-mt-en-gd',
|
34 |
+
'Galician': 'Helsinki-NLP/opus-mt-en-gl',
|
35 |
+
'Gujarati': 'Helsinki-NLP/opus-mt-en-gu',
|
36 |
+
'Hausa': 'Helsinki-NLP/opus-mt-en-ha',
|
37 |
+
'Hebrew': 'Helsinki-NLP/opus-mt-en-he',
|
38 |
+
'Hindi': 'Helsinki-NLP/opus-mt-en-hi',
|
39 |
+
'Croatian': 'Helsinki-NLP/opus-mt-en-hr',
|
40 |
+
'Haitian Creole': 'Helsinki-NLP/opus-mt-en-ht',
|
41 |
+
'Hungarian': 'Helsinki-NLP/opus-mt-en-hu',
|
42 |
+
'Armenian': 'Helsinki-NLP/opus-mt-en-hy',
|
43 |
+
'Indonesian': 'Helsinki-NLP/opus-mt-en-id',
|
44 |
+
'Igbo': 'Helsinki-NLP/opus-mt-en-ig',
|
45 |
+
'Iloko': 'Helsinki-NLP/opus-mt-en-ilo',
|
46 |
+
'Icelandic': 'Helsinki-NLP/opus-mt-en-is',
|
47 |
+
'Italian': 'Helsinki-NLP/opus-mt-en-it',
|
48 |
+
'Japanese': 'Helsinki-NLP/opus-mt-en-ja',
|
49 |
+
'Javanese': 'Helsinki-NLP/opus-mt-en-jv',
|
50 |
+
'Georgian': 'Helsinki-NLP/opus-mt-en-ka',
|
51 |
+
'Kazakh': 'Helsinki-NLP/opus-mt-en-kk',
|
52 |
+
'Central Khmer': 'Helsinki-NLP/opus-mt-en-km',
|
53 |
+
'Kannada': 'Helsinki-NLP/opus-mt-en-kn',
|
54 |
+
'Korean': 'Helsinki-NLP/opus-mt-en-ko',
|
55 |
+
'Luxembourgish': 'Helsinki-NLP/opus-mt-en-lb',
|
56 |
+
'Ganda': 'Helsinki-NLP/opus-mt-en-lg',
|
57 |
+
'Lingala': 'Helsinki-NLP/opus-mt-en-ln',
|
58 |
+
'Lao': 'Helsinki-NLP/opus-mt-en-lo',
|
59 |
+
'Lithuanian': 'Helsinki-NLP/opus-mt-en-lt',
|
60 |
+
'Latvian': 'Helsinki-NLP/opus-mt-en-lv',
|
61 |
+
'Malagasy': 'Helsinki-NLP/opus-mt-en-mg',
|
62 |
+
'Macedonian': 'Helsinki-NLP/opus-mt-en-mk',
|
63 |
+
'Malayalam': 'Helsinki-NLP/opus-mt-en-ml',
|
64 |
+
'Mongolian': 'Helsinki-NLP/opus-mt-en-mn',
|
65 |
+
'Marathi': 'Helsinki-NLP/opus-mt-en-mr',
|
66 |
+
'Malay': 'Helsinki-NLP/opus-mt-en-ms',
|
67 |
+
'Burmese': 'Helsinki-NLP/opus-mt-en-my',
|
68 |
+
'Nepali': 'Helsinki-NLP/opus-mt-en-ne',
|
69 |
+
'Dutch': 'Helsinki-NLP/opus-mt-en-nl',
|
70 |
+
'Norwegian': 'Helsinki-NLP/opus-mt-en-no',
|
71 |
+
'Northern Sotho': 'Helsinki-NLP/opus-mt-en-ns',
|
72 |
+
'Occitan': 'Helsinki-NLP/opus-mt-en-oc',
|
73 |
+
'Oriya': 'Helsinki-NLP/opus-mt-en-or',
|
74 |
+
'Panjabi': 'Helsinki-NLP/opus-mt-en-pa',
|
75 |
+
'Polish': 'Helsinki-NLP/opus-mt-en-pl',
|
76 |
+
'Pushto': 'Helsinki-NLP/opus-mt-en-ps',
|
77 |
+
'Portuguese': 'Helsinki-NLP/opus-mt-en-pt',
|
78 |
+
'Romanian': 'Helsinki-NLP/opus-mt-en-ro',
|
79 |
+
'Russian': 'Helsinki-NLP/opus-mt-en-ru',
|
80 |
+
'Sindhi': 'Helsinki-NLP/opus-mt-en-sd',
|
81 |
+
'Sinhala': 'Helsinki-NLP/opus-mt-en-si',
|
82 |
+
'Slovak': 'Helsinki-NLP/opus-mt-en-sk',
|
83 |
+
'Slovenian': 'Helsinki-NLP/opus-mt-en-sl',
|
84 |
+
'Somali': 'Helsinki-NLP/opus-mt-en-so',
|
85 |
+
'Albanian': 'Helsinki-NLP/opus-mt-en-sq',
|
86 |
+
'Serbian': 'Helsinki-NLP/opus-mt-en-sr',
|
87 |
+
'Swati': 'Helsinki-NLP/opus-mt-en-ss',
|
88 |
+
'Sundanese': 'Helsinki-NLP/opus-mt-en-su',
|
89 |
+
'Swedish': 'Helsinki-NLP/opus-mt-en-sv',
|
90 |
+
'Swahili': 'Helsinki-NLP/opus-mt-en-sw',
|
91 |
+
'Tamil': 'Helsinki-NLP/opus-mt-en-ta',
|
92 |
+
'Thai': 'Helsinki-NLP/opus-mt-en-th',
|
93 |
+
'Tagalog': 'Helsinki-NLP/opus-mt-en-tl',
|
94 |
+
'Tswana': 'Helsinki-NLP/opus-mt-en-tn',
|
95 |
+
'Turkish': 'Helsinki-NLP/opus-mt-en-tr',
|
96 |
+
'Ukrainian': 'Helsinki-NLP/opus-mt-en-uk',
|
97 |
+
'Urdu': 'Helsinki-NLP/opus-mt-en-ur',
|
98 |
+
'Uzbek': 'Helsinki-NLP/opus-mt-en-uz',
|
99 |
+
'Vietnamese': 'Helsinki-NLP/opus-mt-en-vi',
|
100 |
+
'Wolof': 'Helsinki-NLP/opus-mt-en-wo',
|
101 |
+
'Xhosa': 'Helsinki-NLP/opus-mt-en-xh',
|
102 |
+
'Yiddish': 'Helsinki-NLP/opus-mt-en-yi',
|
103 |
+
'Yoruba': 'Helsinki-NLP/opus-mt-en-yo',
|
104 |
+
'Chinese': 'Helsinki-NLP/opus-mt-en-zh',
|
105 |
+
'Zulu': 'Helsinki-NLP/opus-mt-en-zu'
|
106 |
+
}
|
107 |
+
|
108 |
+
def load_model(language):
|
109 |
+
"""Load the model and tokenizer for the specified target language."""
|
110 |
+
model_name = models.get(language)
|
111 |
+
if model_name:
|
112 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
113 |
+
model = MarianMTModel.from_pretrained(model_name)
|
114 |
+
return model, tokenizer
|
115 |
+
else:
|
116 |
+
st.error(f"Model for {language} not found.")
|
117 |
+
return None, None
|
118 |
+
|
119 |
+
def translate_text(text, model, tokenizer):
|
120 |
+
"""Translate text using the provided model and tokenizer."""
|
121 |
+
inputs = tokenizer.encode(text, return_tensors="pt")
|
122 |
+
translated = model.generate(inputs)
|
123 |
+
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
124 |
+
return translated_text
|
125 |
+
|
126 |
+
def main():
|
127 |
+
st.title("🌐 Multilingual Translator")
|
128 |
+
st.markdown("Created by: [**Engr. Hamesh Raj**](https://www.linkedin.com/in/datascientisthameshraj/)")
|
129 |
+
|
130 |
+
# Target language selection
|
131 |
+
target_language = st.selectbox("Select target language:", list(models.keys()))
|
132 |
+
|
133 |
+
# Input text area
|
134 |
+
text_to_translate = st.text_area("Enter text in English:")
|
135 |
+
|
136 |
+
if st.button("Translate"):
|
137 |
+
if text_to_translate:
|
138 |
+
# Load the model based on target language
|
139 |
+
model, tokenizer = load_model(target_language)
|
140 |
+
|
141 |
+
if model and tokenizer:
|
142 |
+
translated_text = translate_text(text_to_translate, model, tokenizer)
|
143 |
+
st.write(f"**Translation in {target_language}:**")
|
144 |
+
st.write(translated_text)
|
145 |
+
else:
|
146 |
+
st.warning("Please enter text to translate.")
|
147 |
+
|
148 |
+
if __name__ == "__main__":
|
149 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
transformers
|
3 |
+
torch
|
4 |
+
sentencepiece
|