puettmann commited on
Commit
8d9fc2f
·
verified ·
1 Parent(s): 139cee5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +72 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import spacy
5
+
6
+ class ModelSingleton:
7
+ _instance = None
8
+
9
+ def __new__(cls, *args, **kwargs):
10
+ if not cls._instance:
11
+ cls._instance = super(ModelSingleton, cls).__new__(cls, *args, **kwargs)
12
+ return cls._instance
13
+
14
+ def __init__(self):
15
+ if not hasattr(self, 'initialized'):
16
+ self.nlp_en = spacy.load("en_core_web_sm")
17
+ self.nlp_it = spacy.load("it_core_news_sm")
18
+
19
+ # Load translation models and tokenizers
20
+ self.tokenizer_en_it = AutoTokenizer.from_pretrained("puettmann/Foglietta-mt-en-it")
21
+ self.model_en_it = AutoModelForSeq2SeqLM.from_pretrained("puettmann/Foglietta-mt-en-it", torch_dtype=torch.bfloat16)
22
+
23
+ self.tokenizer_it_en = AutoTokenizer.from_pretrained("puettmann/Foglietta-mt-it-en")
24
+ self.model_it_en = AutoModelForSeq2SeqLM.from_pretrained("puettmann/Foglietta-mt-it-en", torch_dtype=torch.bfloat16)
25
+
26
+ self.initialized = True
27
+
28
+ model_singleton = ModelSingleton()
29
+
30
+ def generate_response_en_it(input_text):
31
+ input_ids = model_singleton.tokenizer_en_it("translate English to Italian: " + input_text, return_tensors="pt").input_ids
32
+ output = model_singleton.model_en_it.generate(input_ids, max_new_tokens=256)
33
+ return model_singleton.tokenizer_en_it.decode(output[0], skip_special_tokens=True)
34
+
35
+ def generate_response_it_en(input_text):
36
+ input_ids = model_singleton.tokenizer_it_en("translate Italian to English: " + input_text, return_tensors="pt").input_ids
37
+ output = model_singleton.model_it_en.generate(input_ids, max_new_tokens=256)
38
+ return model_singleton.tokenizer_it_en.decode(output[0], skip_special_tokens=True)
39
+
40
+ def translate_text(input_text, direction):
41
+ if direction == "en-it":
42
+ nlp = model_singleton.nlp_en
43
+ generate_response = generate_response_en_it
44
+ elif direction == "it-en":
45
+ nlp = model_singleton.nlp_it
46
+ generate_response = generate_response_it_en
47
+ else:
48
+ return "Invalid direction selected."
49
+
50
+ doc = nlp(input_text)
51
+ sentences = [sent.text for sent in doc.sents]
52
+
53
+ sentence_translations = []
54
+ for sentence in sentences:
55
+ sentence_translation = generate_response(sentence)
56
+ sentence_translations.append(sentence_translation)
57
+
58
+ full_translation = " ".join(sentence_translations)
59
+ return full_translation
60
+
61
+ # Create the Gradio interface
62
+ iface = gr.Interface(
63
+ fn=translate_text,
64
+ inputs=[gr.Textbox(lines=5, placeholder="Enter text to translate...", label="Input Text"),
65
+ gr.Dropdown(choices=["en-it", "it-en"], label="Translation Direction")],
66
+ outputs=gr.Textbox(lines=5, label="Translation"),
67
+ description="Translation using the super small Foglietta models. Initilization might take a couple of seconds the first time. This spaces uses the Foglietta models for it-en and en-it text translation tasks."
68
+ )
69
+
70
+ # Launch the interface
71
+ iface.launch()
72
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ spacy
4
+ spaces
5
+ torch
6
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
7
+ it-core-news-sm @ https://github.com/explosion/spacy-models/releases/download/it_core_news_sm-3.8.0/it_core_news_sm-3.8.0-py3-none-any.whl