DHEIVER commited on
Commit
a58afc9
·
verified ·
1 Parent(s): 89d8261

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -267
app.py CHANGED
@@ -3,326 +3,213 @@ import torch
3
  import torchaudio
4
  import numpy as np
5
  from transformers import AutoProcessor, SeamlessM4Tv2Model
6
- from datetime import datetime
7
- import time
8
 
9
- class ARISTranslator:
10
- def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"):
11
- self.processor = AutoProcessor.from_pretrained(model_name)
12
- self.model = SeamlessM4Tv2Model.from_pretrained(model_name)
 
 
13
  self.sample_rate = self.model.config.sampling_rate
14
-
15
- self.language_codes = {
16
- "English (US)": "eng",
17
- "Spanish (ES)": "spa",
18
- "French (FR)": "fra",
19
- "German (DE)": "deu",
20
- "Italian (IT)": "ita",
21
- "Portuguese (BR)": "por",
22
- "Russian (RU)": "rus",
23
- "Chinese (CN)": "cmn",
24
- "Japanese (JP)": "jpn",
25
- "Korean (KR)": "kor",
26
- "Hindi (IN)": "hin",
27
- "Arabic (AR)": "ara"
28
  }
29
 
30
- def process_audio(self, audio_path: str, tgt_lang: str) -> tuple[int, np.ndarray]:
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
- if audio_path is None:
33
- raise gr.Error("No audio input provided")
34
-
35
- # Carregar e resample do áudio
36
  audio, orig_freq = torchaudio.load(audio_path)
37
  audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
38
 
39
- # Processar através do modelo
40
  inputs = self.processor(audios=audio, return_tensors="pt")
41
- audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
42
- return self.sample_rate, audio_array
43
- except Exception as e:
44
- raise gr.Error(f"Audio processing failed: {str(e)}")
45
-
46
- def translate_text(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
47
- try:
48
- if not text.strip():
49
- raise gr.Error("No text input provided")
50
-
51
- inputs = self.processor(text=text, src_lang=self.language_codes[src_lang], return_tensors="pt")
52
- audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
53
- return self.sample_rate, audio_array
54
  except Exception as e:
55
- raise gr.Error(f"Translation failed: {str(e)}")
56
 
57
  css = """
58
- :root {
59
- --primary: #00ffff;
60
- --secondary: #0066cc;
61
- --accent: #ff3366;
62
- --background: #000000;
63
- --text: #ffffff;
64
- }
65
-
66
- #aris-interface {
67
- background-color: var(--background);
68
- background-image:
69
- radial-gradient(circle at 20% 20%, rgba(0, 102, 204, 0.1) 0%, transparent 50%),
70
- radial-gradient(circle at 80% 80%, rgba(0, 255, 255, 0.1) 0%, transparent 50%);
71
- min-height: 100vh;
72
- font-family: 'Courier New', monospace;
73
  padding: 20px;
74
  }
75
 
76
- .title-container {
77
- text-align: center;
78
- color: var(--primary);
79
- margin-bottom: 30px;
80
- position: relative;
81
- }
82
-
83
- .title-container h1 {
84
- font-size: 3em;
85
- letter-spacing: 10px;
86
- margin: 0;
87
- text-shadow: 0 0 10px var(--primary);
88
- }
89
-
90
- .title-container h3 {
91
- font-size: 1.2em;
92
- letter-spacing: 3px;
93
- opacity: 0.8;
94
- margin: 5px 0;
95
  }
96
 
97
- #status-ring {
98
- width: 400px;
99
- height: 400px;
100
- border: 4px solid var(--primary);
101
- border-radius: 50%;
102
- margin: 20px auto;
103
- position: relative;
104
- animation: pulse 2s infinite;
105
- display: flex;
106
- align-items: center;
107
- justify-content: center;
108
- background:
109
- radial-gradient(circle at center, rgba(0, 255, 255, 0.1) 0%, transparent 70%),
110
- conic-gradient(from 0deg, transparent 0%, rgba(0, 255, 255, 0.1) 50%, transparent 100%);
111
  }
112
 
113
- #outer-ring-decoration {
114
- position: absolute;
115
- width: 420px;
116
- height: 420px;
117
- border-radius: 50%;
118
- border: 1px solid rgba(0, 255, 255, 0.3);
119
- animation: rotate 20s linear infinite;
120
  }
121
 
122
- @keyframes rotate {
123
- from { transform: rotate(0deg); }
124
- to { transform: rotate(360deg); }
125
  }
126
 
127
- @keyframes pulse {
128
- 0% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0.4); }
129
- 70% { box-shadow: 0 0 0 20px rgba(0, 255, 255, 0); }
130
- 100% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0); }
131
  }
132
 
133
- .aris-textbox {
134
- background-color: rgba(0, 0, 0, 0.8) !important;
135
- border: 2px solid var(--primary) !important;
136
- color: var(--primary) !important;
137
- font-family: 'Courier New', monospace !important;
138
- border-radius: 5px !important;
139
- padding: 10px !important;
140
  }
141
 
142
- .aris-button {
143
- background-color: transparent !important;
144
- border: 2px solid var(--primary) !important;
145
- color: var(--primary) !important;
146
- font-family: 'Courier New', monospace !important;
147
- text-transform: uppercase !important;
148
- letter-spacing: 2px !important;
149
- padding: 12px 24px !important;
150
- border-radius: 5px !important;
151
- transition: all 0.3s ease !important;
152
  }
153
 
154
- .aris-button:hover {
155
- background-color: rgba(0, 255, 255, 0.1) !important;
156
- box-shadow: 0 0 15px rgba(0, 255, 255, 0.3) !important;
157
- transform: translateY(-2px) !important;
158
  }
159
 
160
- .status-box {
161
- background-color: rgba(0, 0, 0, 0.8) !important;
162
- border: 2px solid var(--primary) !important;
163
- color: var(--primary) !important;
164
- padding: 15px !important;
165
- border-radius: 5px !important;
166
- margin: 5px !important;
167
- text-align: center !important;
168
- text-transform: uppercase !important;
169
- letter-spacing: 1px !important;
170
- transition: all 0.3s ease !important;
171
- position: relative;
172
- overflow: hidden;
173
  }
174
 
175
- .status-box::before {
176
- content: '';
177
- position: absolute;
178
- top: 0;
179
- left: -100%;
180
- width: 100%;
181
- height: 2px;
182
- background: linear-gradient(90deg, transparent, var(--primary));
183
- animation: scan-line 2s linear infinite;
184
  }
185
 
186
- @keyframes scan-line {
187
- 0% { left: -100%; }
188
- 100% { left: 100%; }
 
 
 
 
189
  }
190
  """
191
 
192
- def create_interface():
193
- translator = ARISTranslator()
194
-
195
- def update_status():
196
- return (
197
- f"A.R.I.S. CORE v2.0.0\n"
198
- f"Time: {datetime.now().strftime('%H:%M:%S')}\n"
199
- f"Neural Engine: ACTIVE\n"
200
- f"Translation Matrix: OPERATIONAL"
201
  )
202
-
203
- with gr.Blocks(css=css, title="A.R.I.S. - Advanced Real-time Interpretation System") as demo:
204
- gr.HTML('''
205
- <div class="title-container">
206
- <h1>A.R.I.S.</h1>
207
- <h3>Advanced Real-time Interpretation System</h3>
208
- <div class="mode-indicator">QUANTUM CORE ACTIVE</div>
209
- </div>
210
- ''')
211
-
212
- with gr.Column(elem_id="aris-interface"):
213
- gr.HTML("""
214
- <div id="status-ring">
215
- <div id="outer-ring-decoration"></div>
216
- <div id="inner-ring">
217
- <div id="core">
218
- <div>A.R.I.S.</div>
219
- <div>QUANTUM CORE</div>
220
- <div>v2.0.0</div>
221
- <div class="system-version">NEURAL ENGINE ACTIVE</div>
222
- </div>
223
- </div>
224
- </div>
225
- """)
226
-
227
- with gr.Row():
228
- with gr.Column():
229
- with gr.Tab("Text Translation"):
230
  text_input = gr.Textbox(
231
- label="INPUT TEXT",
232
- placeholder="Enter text for translation...",
233
- elem_classes=["aris-textbox"],
234
- lines=3
235
  )
236
  with gr.Row():
237
- src_lang_text = gr.Dropdown(
238
- choices=list(translator.language_codes.keys()),
239
- value="English (US)",
240
- label="SOURCE LANGUAGE",
241
- elem_classes=["aris-textbox"]
242
  )
243
- tgt_lang_text = gr.Dropdown(
244
- choices=list(translator.language_codes.keys()),
245
- value="Spanish (ES)",
246
- label="TARGET LANGUAGE",
247
- elem_classes=["aris-textbox"]
248
  )
249
- translate_btn = gr.Button("▶ TRANSLATE TEXT", elem_classes=["aris-button"])
250
-
251
- with gr.Tab("Audio Translation"):
 
 
 
 
 
 
 
 
 
 
252
  audio_input = gr.Audio(
253
- label="AUDIO INPUT",
254
  type="filepath"
255
  )
256
  tgt_lang_audio = gr.Dropdown(
257
- choices=list(translator.language_codes.keys()),
258
- value="English (US)",
259
- label="TARGET LANGUAGE",
260
- elem_classes=["aris-textbox"]
261
  )
262
- translate_audio_btn = gr.Button(" TRANSLATE AUDIO", elem_classes=["aris-button"])
263
-
264
- with gr.Column():
265
- audio_output = gr.Audio(
266
- label="TRANSLATION OUTPUT",
267
- type="numpy"
268
- )
269
-
270
- with gr.Row():
271
- with gr.Column(min_width=200):
272
- gr.HTML(
273
- """
274
- <div class="status-box">
275
- NEURAL CORE<br>
276
- <strong>OPERATIONAL</strong>
277
- </div>
278
- """
279
- )
280
- with gr.Column(min_width=200):
281
- gr.HTML(
282
- """
283
- <div class="status-box">
284
- QUANTUM ENGINE<br>
285
- <strong>ACTIVE</strong>
286
- </div>
287
- """
288
- )
289
-
290
- with gr.Row():
291
- with gr.Column(min_width=200):
292
- gr.HTML(
293
- """
294
- <div class="status-box">
295
- TRANSLATION MATRIX<br>
296
- <strong>CALIBRATED</strong>
297
- </div>
298
- """
299
- )
300
- with gr.Column(min_width=200):
301
- gr.HTML(
302
- """
303
- <div class="status-box">
304
- VOICE SYNTHESIS<br>
305
- <strong>READY</strong>
306
- </div>
307
- """
308
- )
309
 
310
- # Event handlers
311
- translate_btn.click(
312
- fn=translator.translate_text,
313
- inputs=[text_input, src_lang_text, tgt_lang_text],
314
- outputs=audio_output
315
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
- translate_audio_btn.click(
318
- fn=translator.process_audio,
319
- inputs=[audio_input, tgt_lang_audio],
320
- outputs=audio_output
321
- )
322
-
323
  return demo
324
 
325
  if __name__ == "__main__":
326
- demo = create_interface()
327
  demo.queue()
328
  demo.launch()
 
3
  import torchaudio
4
  import numpy as np
5
  from transformers import AutoProcessor, SeamlessM4Tv2Model
 
 
6
 
7
+ class SeamlessTranslator:
8
+ def __init__(self):
9
+ self.model_name = "facebook/seamless-m4t-v2-large"
10
+ print("Loading model...")
11
+ self.processor = AutoProcessor.from_pretrained(self.model_name)
12
+ self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name)
13
  self.sample_rate = self.model.config.sampling_rate
14
+
15
+ self.languages = {
16
+ "English": "eng",
17
+ "Spanish": "spa",
18
+ "French": "fra",
19
+ "German": "deu",
20
+ "Italian": "ita",
21
+ "Portuguese": "por",
22
+ "Russian": "rus",
23
+ "Chinese": "cmn",
24
+ "Japanese": "jpn",
25
+ "Korean": "kor"
 
 
26
  }
27
 
28
+ def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()):
29
+ progress(0.3, desc="Processing input...")
30
+ try:
31
+ inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt")
32
+ progress(0.6, desc="Generating audio...")
33
+ audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
34
+ progress(1.0, desc="Done!")
35
+ return (self.sample_rate, audio_array)
36
+ except Exception as e:
37
+ raise gr.Error(str(e))
38
+
39
+ def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()):
40
+ progress(0.3, desc="Loading audio...")
41
  try:
 
 
 
 
42
  audio, orig_freq = torchaudio.load(audio_path)
43
  audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
44
 
45
+ progress(0.6, desc="Translating...")
46
  inputs = self.processor(audios=audio, return_tensors="pt")
47
+ audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
48
+ progress(1.0, desc="Done!")
49
+ return (self.sample_rate, audio_array)
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
+ raise gr.Error(str(e))
52
 
53
  css = """
54
+ #component-0 {
55
+ max-width: 1200px;
56
+ margin: auto;
 
 
 
 
 
 
 
 
 
 
 
 
57
  padding: 20px;
58
  }
59
 
60
+ .container {
61
+ border-radius: 12px;
62
+ padding: 20px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  }
64
 
65
+ .gr-form {
66
+ border-color: #e5e7eb !important;
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
 
69
+ .gr-button {
70
+ border-radius: 8px !important;
71
+ background: linear-gradient(to right, #2563eb, #4f46e5) !important;
72
+ color: white !important;
73
+ font-weight: 600 !important;
 
 
74
  }
75
 
76
+ .gr-button:hover {
77
+ box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1) !important;
78
+ transform: translateY(-1px);
79
  }
80
 
81
+ .gr-input, .gr-select {
82
+ border-radius: 8px !important;
 
 
83
  }
84
 
85
+ .gr-panel {
86
+ border-radius: 12px !important;
 
 
 
 
 
87
  }
88
 
89
+ .title {
90
+ text-align: center;
91
+ font-size: 2.5rem;
92
+ font-weight: bold;
93
+ margin: 1rem 0;
94
+ background: linear-gradient(to right, #2563eb, #4f46e5);
95
+ -webkit-background-clip: text;
96
+ -webkit-text-fill-color: transparent;
 
 
97
  }
98
 
99
+ .subtitle {
100
+ text-align: center;
101
+ color: #6b7280;
102
+ margin-bottom: 2rem;
103
  }
104
 
105
+ .tab-nav {
106
+ border-bottom: 2px solid #e5e7eb;
107
+ margin-bottom: 1rem;
 
 
 
 
 
 
 
 
 
 
108
  }
109
 
110
+ .output-label {
111
+ font-weight: 600;
112
+ color: #374151;
113
+ margin-bottom: 0.5rem;
 
 
 
 
 
114
  }
115
 
116
+ .footer {
117
+ text-align: center;
118
+ margin-top: 2rem;
119
+ padding-top: 1rem;
120
+ border-top: 1px solid #e5e7eb;
121
+ color: #6b7280;
122
+ font-size: 0.875rem;
123
  }
124
  """
125
 
126
+ def create_ui():
127
+ translator = SeamlessTranslator()
128
+
129
+ with gr.Blocks(css=css, title="A.R.I.S. Translator") as demo:
130
+ gr.HTML(
131
+ """
132
+ <div class="title">A.R.I.S. Translator</div>
133
+ <div class="subtitle">Advanced Real-time Interpretation System</div>
134
+ """
135
  )
136
+
137
+ with gr.Tabs() as tabs:
138
+ # Text to Speech Tab
139
+ with gr.Tab("Text Translation", id=1):
140
+ with gr.Row():
141
+ with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  text_input = gr.Textbox(
143
+ label="Text to Translate",
144
+ placeholder="Enter your text here...",
145
+ lines=5
 
146
  )
147
  with gr.Row():
148
+ src_lang = gr.Dropdown(
149
+ choices=list(translator.languages.keys()),
150
+ value="English",
151
+ label="Source Language"
 
152
  )
153
+ tgt_lang = gr.Dropdown(
154
+ choices=list(translator.languages.keys()),
155
+ value="Spanish",
156
+ label="Target Language"
 
157
  )
158
+ translate_btn = gr.Button("Translate", variant="primary")
159
+
160
+ with gr.Column():
161
+ gr.HTML('<div class="output-label">Translation Output</div>')
162
+ audio_output = gr.Audio(
163
+ label="Translated Audio",
164
+ type="numpy"
165
+ )
166
+
167
+ # Audio to Speech Tab
168
+ with gr.Tab("Audio Translation", id=2):
169
+ with gr.Row():
170
+ with gr.Column():
171
  audio_input = gr.Audio(
172
+ label="Upload Audio",
173
  type="filepath"
174
  )
175
  tgt_lang_audio = gr.Dropdown(
176
+ choices=list(translator.languages.keys()),
177
+ value="English",
178
+ label="Target Language"
 
179
  )
180
+ translate_audio_btn = gr.Button("Translate Audio", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ with gr.Column():
183
+ gr.HTML('<div class="output-label">Translation Output</div>')
184
+ audio_output_from_audio = gr.Audio(
185
+ label="Translated Audio",
186
+ type="numpy"
187
+ )
188
+
189
+ gr.HTML(
190
+ """
191
+ <div class="footer">
192
+ Powered by Meta's SeamlessM4T model | Built with Gradio
193
+ </div>
194
+ """
195
+ )
196
+
197
+ # Event handlers
198
+ translate_btn.click(
199
+ fn=translator.translate_text,
200
+ inputs=[text_input, src_lang, tgt_lang],
201
+ outputs=audio_output
202
+ )
203
+
204
+ translate_audio_btn.click(
205
+ fn=translator.translate_audio,
206
+ inputs=[audio_input, tgt_lang_audio],
207
+ outputs=audio_output_from_audio
208
+ )
209
 
 
 
 
 
 
 
210
  return demo
211
 
212
  if __name__ == "__main__":
213
+ demo = create_ui()
214
  demo.queue()
215
  demo.launch()