Paulie-Aditya commited on
Commit
896d0ef
·
1 Parent(s): 408fe9a

hoping this works

Browse files
Files changed (2) hide show
  1. .gitignore +4 -0
  2. app.py +95 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *.gitattributes
2
+ *.venv
3
+ *.vscode
4
+ secrets_file.py
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from transformers import pipeline
4
+ import nltk
5
+ from nltk import sent_tokenize
6
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
7
+ from transformers import pipeline
8
+
9
+ # Use a pipeline as a high-level helper
10
+ from transformers import pipeline
11
+
12
+ pipe = pipeline("text2text-generation", model="SnypzZz/Llama2-13b-Language-translate", use_fast = False)
13
+
14
+ tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX")
15
+ model = None
16
+ model_loaded = False
17
+
18
+ from secrets_file import api_token_header
19
+
20
+
21
+ def load_model():
22
+ global model, model_loaded
23
+ model = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate")
24
+ model_loaded =True
25
+ return model
26
+
27
+ def translation(text,dest_lang,dest_lang_code, src_lang_code):
28
+
29
+ if(dest_lang_code == src_lang_code):
30
+ return "Please select different languages to translate between."
31
+
32
+ headers = {"Authorization": f"Bearer {api_token_header}"}
33
+
34
+ # Bengali Done
35
+ if(dest_lang == "Bengali" and src_lang_code == "en_XX"):
36
+ API_URL = "https://api-inference.huggingface.co/models/csebuetnlp/banglat5_nmt_en_bn"
37
+ def query(payload):
38
+ response = requests.post(API_URL, headers=headers, json=payload)
39
+ return response.json()
40
+ output = query({
41
+ "inputs": text,
42
+ })
43
+ print(output)
44
+ return output[0]['translation_text']
45
+ else:
46
+ global model
47
+ if model:
48
+ pass
49
+ else:
50
+ model = load_model()
51
+ loaded_model = model
52
+ tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang=src_lang_code)
53
+ #model_inputs = tokenizer(text, return_tensors="pt")
54
+ loaded_model_inputs = tokenizer(text, return_tensors="pt")
55
+
56
+ # translate
57
+ generated_tokens = loaded_model.generate(
58
+ **loaded_model_inputs,
59
+ forced_bos_token_id=tokenizer.lang_code_to_id[dest_lang_code]
60
+ )
61
+ output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
62
+ print(output)
63
+ return output[0]
64
+
65
+
66
+ def main_translation(text,dest_lang_code,src_lang_code):
67
+
68
+ codes = {"en_XX":"English","bn_IN":"Bengali", "en_GB":"English","gu_IN":"Gujarati","hi_IN":"Hindi","ta_IN":"Tamil","te_IN":"Telugu","mr_IN":"Marathi"}
69
+ dest_lang = codes[dest_lang_code]
70
+ src_lang = codes[src_lang_code]
71
+
72
+ sentences = sent_tokenize(text)
73
+ output = ""
74
+ for line in sentences:
75
+ output += translation(line,dest_lang,dest_lang_code, src_lang_code)
76
+ return {"output":output}
77
+
78
+
79
+ def test(text, src, dest):
80
+ ans = main_translation(text,dest,src)
81
+ return ans['output']
82
+ demo = gr.Interface(
83
+ test,
84
+ ["textbox",
85
+ gr.Dropdown(
86
+ [("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Source", info="Select the Source Language!"
87
+ ),
88
+ gr.Dropdown(
89
+ [("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Destination", info="Select the Destination Language!"
90
+ ),
91
+ ],
92
+ outputs=["textbox"],
93
+ )
94
+
95
+ demo.launch()