andromeda01111 commited on
Commit
256a489
·
verified ·
1 Parent(s): 5c2ef6f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -0
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Posts
8
+ Docs
9
+ Enterprise
10
+ Pricing
11
+
12
+
13
+
14
+ Spaces:
15
+
16
+ qanastek
17
+ /
18
+ Alexa-NLU-Clone
19
+
20
+
21
+ like
22
+ 7
23
+ App
24
+ Files
25
+ Community
26
+ Alexa-NLU-Clone
27
+ /
28
+ app.py
29
+
30
+ qanastek's picture
31
+ qanastek
32
+ Update
33
+ 51be472
34
+ over 2 years ago
35
+ raw
36
+
37
+ Copy download link
38
+ history
39
+ blame
40
+ contribute
41
+ delete
42
+
43
+ 4.87 kB
44
+ import gradio as gr
45
+
46
+ import os
47
+ import torch
48
+ import librosa
49
+ from glob import glob
50
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline, AutoModelForTokenClassification, TokenClassificationPipeline, Wav2Vec2ForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
51
+
52
+ SAMPLE_RATE = 16_000
53
+
54
+ models = {}
55
+
56
+ models_paths = {
57
+ "en-US": "jonatasgrosman/wav2vec2-large-xlsr-53-english",
58
+ "fr-FR": "jonatasgrosman/wav2vec2-large-xlsr-53-french",
59
+ "nl-NL": "jonatasgrosman/wav2vec2-large-xlsr-53-dutch",
60
+ "pl-PL": "jonatasgrosman/wav2vec2-large-xlsr-53-polish",
61
+ "it-IT": "jonatasgrosman/wav2vec2-large-xlsr-53-italian",
62
+ "ru-RU": "jonatasgrosman/wav2vec2-large-xlsr-53-russian",
63
+ "pt-PT": "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese",
64
+ "de-DE": "jonatasgrosman/wav2vec2-large-xlsr-53-german",
65
+ "es-ES": "jonatasgrosman/wav2vec2-large-xlsr-53-spanish",
66
+ "ja-JP": "jonatasgrosman/wav2vec2-large-xlsr-53-japanese",
67
+ "ar-SA": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
68
+ "fi-FI": "jonatasgrosman/wav2vec2-large-xlsr-53-finnish",
69
+ "hu-HU": "jonatasgrosman/wav2vec2-large-xlsr-53-hungarian",
70
+ "zh-CN": "jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn",
71
+ "el-GR": "jonatasgrosman/wav2vec2-large-xlsr-53-greek",
72
+ }
73
+
74
+ # Classifier Intent
75
+ model_name = 'qanastek/XLMRoberta-Alexa-Intents-Classification'
76
+ tokenizer_intent = AutoTokenizer.from_pretrained(model_name)
77
+ model_intent = AutoModelForSequenceClassification.from_pretrained(model_name)
78
+ classifier_intent = TextClassificationPipeline(model=model_intent, tokenizer=tokenizer_intent)
79
+
80
+ # Classifier Language
81
+ model_name = 'qanastek/51-languages-classifier'
82
+ tokenizer_langs = AutoTokenizer.from_pretrained(model_name)
83
+ model_langs = AutoModelForSequenceClassification.from_pretrained(model_name)
84
+ classifier_language = TextClassificationPipeline(model=model_langs, tokenizer=tokenizer_langs)
85
+
86
+ # NER Extractor
87
+ model_name = 'qanastek/XLMRoberta-Alexa-Intents-NER-NLU'
88
+ tokenizer_ner = AutoTokenizer.from_pretrained(model_name)
89
+ model_ner = AutoModelForTokenClassification.from_pretrained(model_name)
90
+ predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
91
+
92
+ EXAMPLE_DIR = './wavs/'
93
+ examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav')))
94
+ examples = [[e, e.split("=")[0].split("/")[-1]] for e in examples]
95
+
96
+ def transcribe(audio_path, lang_code):
97
+
98
+ speech_array, sampling_rate = librosa.load(audio_path, sr=16_000)
99
+
100
+ if lang_code not in models:
101
+ models[lang_code] = {}
102
+ models[lang_code]["processor"] = Wav2Vec2Processor.from_pretrained(models_paths[lang_code])
103
+ models[lang_code]["model"] = Wav2Vec2ForCTC.from_pretrained(models_paths[lang_code])
104
+
105
+ # Load model
106
+ processor_asr = models[lang_code]["processor"]
107
+ model_asr = models[lang_code]["model"]
108
+
109
+ inputs = processor_asr(speech_array, sampling_rate=16_000, return_tensors="pt", padding=True)
110
+
111
+ with torch.no_grad():
112
+ logits = model_asr(inputs.input_values, attention_mask=inputs.attention_mask).logits
113
+
114
+ predicted_ids = torch.argmax(logits, dim=-1)
115
+
116
+ return processor_asr.batch_decode(predicted_ids)[0]
117
+
118
+ def getUniform(text):
119
+
120
+ idx = 0
121
+ res = {}
122
+
123
+ for t in text:
124
+
125
+ raw = t["entity"].replace("B-","").replace("I-","")
126
+ word = t["word"].replace("▁","")
127
+
128
+ if "B-" in t["entity"]:
129
+ res[f"{raw}|{idx}"] = [word]
130
+ idx += 1
131
+ else:
132
+ res[f"{raw}|{idx}"].append(word)
133
+
134
+ res = [(r.split("|")[0], res[r]) for r in res]
135
+
136
+ return res
137
+
138
+
139
+ def predict(wav_file, lang_code):
140
+
141
+ if lang_code not in models_paths.keys():
142
+
143
+ return {
144
+ "The language code is unknown!"
145
+ }
146
+
147
+ text = transcribe(wav_file, lang_code).replace("apizza","a pizza") + " ."
148
+
149
+ intent_class = classifier_intent(text)[0]["label"]
150
+ language_class = classifier_language(text)[0]["label"]
151
+ named_entities = getUniform(predict_ner(text))
152
+
153
+ return {
154
+ "text": text,
155
+ "language": language_class,
156
+ "intent_class": intent_class,
157
+ "named_entities": named_entities,
158
+ }
159
+
160
+ iface = gr.Interface(
161
+ predict,
162
+ title='Sentiment Analysis project',
163
+ description='Upload your wav file to test the models (<i>First execution take about 20s to 30s, then next run in less than 1s</i>)',
164
+ # thumbnail="",
165
+ inputs=[
166
+ gr.inputs.Audio(label='wav file', source='microphone', type='filepath'),
167
+ gr.inputs.Dropdown(choices=list(models_paths.keys())),
168
+ ],
169
+ outputs=[
170
+ gr.outputs.JSON(label='ASR -> Slot Recognition + Intent Classification + Language Classification'),
171
+ ],
172
+ examples=examples,
173
+ article='Sentiment Analysis project',
174
+ )
175
+
176
+ iface.launch()