Gundeep Singh commited on
Commit
860d7e4
·
1 Parent(s): ebee301

Update auto detect label on language detection

Browse files
Files changed (7) hide show
  1. .gitignore +1 -1
  2. app.py +52 -14
  3. examples.py +14 -0
  4. iso639_wrapper.py +22 -0
  5. language_directions.py +19 -18
  6. project-notes.md +3 -1
  7. utils.py +17 -1
.gitignore CHANGED
@@ -1 +1 @@
1
- *pycache*
 
1
+ *pycache*
app.py CHANGED
@@ -1,15 +1,27 @@
 
1
  import gradio as gr
2
  from language_directions import *
3
  from transformers import pipeline
 
4
 
5
  source_lang_dict = get_all_source_languages()
6
  target_lang_dict = {}
7
  source_languages = source_lang_dict.keys()
8
 
9
- def source_dropdown_changed(source_dropdown, input_text=""):
 
 
 
 
 
 
 
 
 
 
10
  global target_lang_dict
11
- target_lang_dict = get_target_languages(source_lang_dict[source_dropdown], input_text)
12
- target_languages = target_lang_dict.keys()
13
  default_target_value = None
14
  if "English" in target_languages or "english" in target_languages:
15
  default_target_value = "English"
@@ -19,16 +31,41 @@ def source_dropdown_changed(source_dropdown, input_text=""):
19
  value=default_target_value,
20
  label="Target Language")
21
  return target_dropdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def translate(input_text, source, target):
24
- if source == "Auto Detect":
 
25
  source, _ = auto_detect_language_code(input_text)
26
- target_lang_dict = get_target_languages(source)
27
- target = target_lang_dict[target]
28
- model = f"Helsinki-NLP/opus-mt-{source}-{target}"
29
- pipe = pipeline("translation", model=model)
30
- translation = pipe(input_text)
31
- return translation[0]['translation_text']
 
 
 
 
 
32
 
33
 
34
  with gr.Blocks() as demo:
@@ -55,14 +92,15 @@ with gr.Blocks() as demo:
55
  value="English",
56
  label="Target Language")
57
  translated_textbox = gr.Textbox(lines=5, placeholder="", label="Translated Text")
 
58
  btn = gr.Button("Translate")
59
- source_language_dropdown.change(source_dropdown_changed, inputs=source_language_dropdown, outputs=target_language_dropdown)
 
60
  btn.click(translate, inputs=[input_textbox,
61
  source_language_dropdown,
62
  target_language_dropdown],
63
- outputs=translated_textbox)
64
- gr.Examples(["Je te rencontre au café", "Répétez s'il vous plaît."],
65
- inputs=[input_textbox])
66
 
67
  if __name__ == "__main__":
68
  demo.launch()
 
1
+ # from responses import start
2
  import gradio as gr
3
  from language_directions import *
4
  from transformers import pipeline
5
+ from examples import example_sentences
6
 
7
  source_lang_dict = get_all_source_languages()
8
  target_lang_dict = {}
9
  source_languages = source_lang_dict.keys()
10
 
11
+ def get_auto_detect_source_dropdown(input_text):
12
+ source, _ = auto_detect_language_code(input_text)
13
+ language_name = get_name_from_iso_code(source)
14
+ source_dropdown_text = "Detected - " + language_name
15
+ update_source_languages_dict(source_lang_dict, source_dropdown_text)
16
+ source_language_dropdown = gr.Dropdown(choices=source_languages,
17
+ value=source_dropdown_text,
18
+ label="Source Language")
19
+ return source_language_dropdown, language_name
20
+
21
+ def get_target_dropdown(source_language_name, input_text):
22
  global target_lang_dict
23
+ target_lang_dict, source_language = get_target_languages(source_lang_dict[source_language_name], input_text)
24
+ target_languages = list(target_lang_dict.keys())
25
  default_target_value = None
26
  if "English" in target_languages or "english" in target_languages:
27
  default_target_value = "English"
 
31
  value=default_target_value,
32
  label="Target Language")
33
  return target_dropdown
34
+
35
+ def get_dropdown_value(dropdown):
36
+ if isinstance(dropdown, gr.Dropdown):
37
+ dropdown_value = dropdown.constructor_args.get('value')
38
+ elif isinstance(dropdown, str):
39
+ dropdown_value = dropdown
40
+ return dropdown_value
41
+
42
+ def get_dropdowns(source_dropdown, input_text):
43
+ source_language_name = get_dropdown_value(source_dropdown)
44
+ if input_text and source_language_name == "Auto Detect" or source_language_name.startswith("Detected"):
45
+ source_dropdown, source_language_name = get_auto_detect_source_dropdown(input_text)
46
+ target_dropdown = get_target_dropdown(source_language_name=source_language_name,
47
+ input_text=input_text)
48
+ return source_dropdown, target_dropdown
49
+
50
+ def input_changed(source_language_dropdown, input_text=""):
51
+ return get_dropdowns(source_dropdown=source_language_dropdown,
52
+ input_text=input_text)
53
 
54
  def translate(input_text, source, target):
55
+ source_readable = source
56
+ if source == "Auto Detect" or source.startswith("Detected"):
57
  source, _ = auto_detect_language_code(input_text)
58
+ if source in source_lang_dict.keys():
59
+ source = source_lang_dict[source]
60
+ target_lang_dict, _ = get_target_languages(source)
61
+ try:
62
+ target = target_lang_dict[target]
63
+ model = f"Helsinki-NLP/opus-mt-{source}-{target}"
64
+ pipe = pipeline("translation", model=model)
65
+ translation = pipe(input_text)
66
+ return translation[0]['translation_text'], ""
67
+ except KeyError:
68
+ return "", f"Error: Translation direction {source_readable} to {target} is not supported by Helsinki Translation Models"
69
 
70
 
71
  with gr.Blocks() as demo:
 
92
  value="English",
93
  label="Target Language")
94
  translated_textbox = gr.Textbox(lines=5, placeholder="", label="Translated Text")
95
+ info_label = gr.HTML("")
96
  btn = gr.Button("Translate")
97
+ source_language_dropdown.change(input_changed, inputs=[source_language_dropdown, input_textbox], outputs=[source_language_dropdown, target_language_dropdown])
98
+ input_textbox.change(input_changed, inputs=[source_language_dropdown, input_textbox], outputs=[source_language_dropdown, target_language_dropdown])
99
  btn.click(translate, inputs=[input_textbox,
100
  source_language_dropdown,
101
  target_language_dropdown],
102
+ outputs=[translated_textbox, info_label])
103
+ gr.Examples(example_sentences, inputs=[input_textbox])
 
104
 
105
  if __name__ == "__main__":
106
  demo.launch()
examples.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ example_sentences = [
2
+ "Je te rencontre au café", "Répétez s'il vous plaît.",
3
+ "The mountains stand tall, embracing the clouds with their majestic peaks.",
4
+ "सितारों का आकाश में खोया होने का एहसास मन को अद्वितीय सुख देता है।",
5
+ "ਜਟ ਦਾ ਮੁਕਾਬਲਾ ਦਸ ਮੈਨੂੰ ਕਿਥੇ ਆ ਨੀ।",
6
+ "Il profumo dei fiori primaverili riempie l'aria, portando gioia e speranza.",
7
+ "Güneş batarken, gökyüzünü altın rengine boyuyor ve doğayı sihirli bir atmosfere bürüyor.",
8
+ "De wind fluistert door de bomen, een symfonie van rust en harmonie.",
9
+ "눈이 하얗게 내리고, 숲은 고요로움으로 가득 차 있습니다.",
10
+ "הכוכבים מאירים בשמי הלילה, משאירים את הלב פתוח לקסמם.",
11
+ "Hương hoa lan tỏa trong không khí, mang lại cảm giác êm đềm và sự bình yên.",
12
+ "Regnet faller mjukt mot marken, skapar en känsla av förnyelse och friskhet.",
13
+ "Η θάλασσα χτυπά την ακτή με απαλές κύματα, φέρνοντας ηρεμία και γαλήνη στην ψυχή.",
14
+ ]
iso639_wrapper.py CHANGED
@@ -1,4 +1,5 @@
1
  from iso639 import Lang, iter_langs
 
2
 
3
 
4
  langs = [lang for lang in iter_langs()]
@@ -24,6 +25,27 @@ iso5_name_to_code = {lg.name: lg.pt5 for lg in langs}
24
  # https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/master/README.md#in-more-detail
25
  helsinki_precendence = ["iso3", "iso5", "iso1", "iso2t", "iso2b"]
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def get_name_from_iso_code(iso_code, precedence=helsinki_precendence):
28
  for code_type in precedence:
29
  if code_type == "iso1" and iso_code in iso1_code_to_name.keys():
 
1
  from iso639 import Lang, iter_langs
2
+ from regex import R
3
 
4
 
5
  langs = [lang for lang in iter_langs()]
 
25
  # https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/master/README.md#in-more-detail
26
  helsinki_precendence = ["iso3", "iso5", "iso1", "iso2t", "iso2b"]
27
 
28
+ rename_dict = {"Panjabi": "Punjabi"}
29
+
30
+ def rename_languages(language):
31
+ if language in rename_dict:
32
+ return rename_dict[language]
33
+ return language
34
+
35
+ def rename_return_value(func):
36
+ def wrapper(*args, **kwargs):
37
+ result = func(*args, **kwargs)
38
+ if isinstance(result, str):
39
+ return rename_languages(result)
40
+ elif isinstance(result, list):
41
+ return [rename_languages(item) for item in result]
42
+ elif isinstance(result, dict):
43
+ return {key: rename_languages(value) for key, value in result.items()}
44
+ else:
45
+ return result
46
+ return wrapper
47
+
48
+ @rename_return_value
49
  def get_name_from_iso_code(iso_code, precedence=helsinki_precendence):
50
  for code_type in precedence:
51
  if code_type == "iso1" and iso_code in iso1_code_to_name.keys():
language_directions.py CHANGED
@@ -2,12 +2,12 @@ from helsinki_models import helsinki_models, get_clearly_formatted_langauge_dire
2
  from iso639_wrapper import get_name_from_iso_code
3
  from language_detection import detect_language
4
  from collections import OrderedDict
5
- from utils import convert_keys_to_lowercase
6
 
7
 
8
  def get_all_source_languages():
9
  """
10
- Returns a human-readable `dict of source languages names to codes`
11
  based on the available models.
12
  """
13
  source_languages = {}
@@ -23,6 +23,9 @@ def get_all_source_languages():
23
  { **{'Auto Detect' : 'Auto Detect'}, **source_languages}
24
  return all_source_langs_including_auto_detect
25
 
 
 
 
26
  def get_target_languages(source_language_code, input_text=None):
27
  """
28
  Returns a human-readable `dict of target languages names to codes`
@@ -40,26 +43,24 @@ def get_target_languages(source_language_code, input_text=None):
40
  target_language_name = get_name_from_iso_code(target_language)
41
  if target_language_name:
42
  target_languages[target_language_name] = target_language
43
- return OrderedDict(sorted(target_languages.items()))
44
 
45
  def auto_detect_language_code(input_text):
 
 
46
  if not input_text:
47
- return None, True
48
- language = detect_language(input_text)
49
- if language == "unknown":
50
- print("unknown tesxt for ", input_text)
51
- return "unknown", True
52
- elif language in list(get_all_source_languages().keys())\
53
- or language.lower() in [k.lower() for k in list(get_all_source_languages().keys())]:
54
- source_languages_dict = convert_keys_to_lowercase(get_all_source_languages())
55
- source_language_code = source_languages_dict.get(language.lower())
56
- return source_language_code, False
57
- elif language in list(get_all_source_languages().values())\
58
- or language.lower() in [k.lower() for k in list(get_all_source_languages().values())]:
59
- source_language_code = language
60
- return source_language_code, False
61
  else:
62
- raise ValueError(f"Language {source_language_code} not supported")
 
 
 
 
 
 
63
 
64
 
65
  # Example usage:
 
2
  from iso639_wrapper import get_name_from_iso_code
3
  from language_detection import detect_language
4
  from collections import OrderedDict
5
+ from utils import convert_keys_to_lowercase, match_in_keys, match_in_values
6
 
7
 
8
  def get_all_source_languages():
9
  """
10
+ Returns a human-readable `dict source_languages_names:codes`
11
  based on the available models.
12
  """
13
  source_languages = {}
 
23
  { **{'Auto Detect' : 'Auto Detect'}, **source_languages}
24
  return all_source_langs_including_auto_detect
25
 
26
+ def update_source_languages_dict(source_languages_dict, auto_detected_language):
27
+ source_languages_dict[auto_detected_language] = "Auto Detect"
28
+
29
  def get_target_languages(source_language_code, input_text=None):
30
  """
31
  Returns a human-readable `dict of target languages names to codes`
 
43
  target_language_name = get_name_from_iso_code(target_language)
44
  if target_language_name:
45
  target_languages[target_language_name] = target_language
46
+ return OrderedDict(sorted(target_languages.items())), source_language_code
47
 
48
  def auto_detect_language_code(input_text):
49
+ DEFAULT_SOURCE_LANGUAGE = "en"
50
+ detected_language_string = DEFAULT_SOURCE_LANGUAGE
51
  if not input_text:
52
+ return DEFAULT_SOURCE_LANGUAGE, True
53
+ language_or_code = detect_language(input_text)
54
+ if language_or_code == "unknown":
55
+ return DEFAULT_SOURCE_LANGUAGE, True
 
 
 
 
 
 
 
 
 
 
56
  else:
57
+ detected_language_string = match_in_keys(get_all_source_languages(), language_or_code)
58
+ if not detected_language_string:
59
+ detected_language_string = match_in_values(get_all_source_languages(), language_or_code)
60
+ if detected_language_string:
61
+ return detected_language_string, False
62
+ else:
63
+ return DEFAULT_SOURCE_LANGUAGE, True
64
 
65
 
66
  # Example usage:
project-notes.md CHANGED
@@ -1,4 +1,6 @@
1
  # Scope of project
2
  1. Enable multiple languages translate based on helsinki models.✅
3
  2. Enable auto detect langauge ✅
4
- 3. Show error message instead of gradio error
 
 
 
1
  # Scope of project
2
  1. Enable multiple languages translate based on helsinki models.✅
3
  2. Enable auto detect langauge ✅
4
+ 3. Show error message instead of gradio error
5
+ 4. Add examples ✅
6
+ 5. Auto detect on text change ✅
utils.py CHANGED
@@ -1,2 +1,18 @@
 
 
 
1
  def convert_keys_to_lowercase(input_dict):
2
- return {key.lower(): value for key, value in input_dict.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import cache
2
+
3
+
4
  def convert_keys_to_lowercase(input_dict):
5
+ return {key.lower(): value for key, value in input_dict.items()}
6
+
7
+ def match_in_keys(dictionary, search_string):
8
+ lowercase_dict = convert_keys_to_lowercase(dictionary)
9
+ if search_string.lower() in list(lowercase_dict.keys()):
10
+ return lowercase_dict.get(search_string.lower())
11
+ for l_key in lowercase_dict.keys():
12
+ if l_key.startswith(search_string.lower()):
13
+ return lowercase_dict.get(l_key)
14
+
15
+ def match_in_values(dictionary, search_string):
16
+ lowercase_dict = convert_keys_to_lowercase(dictionary)
17
+ if search_string.lower() in list(lowercase_dict.values()):
18
+ return search_string