gpt-99 commited on
Commit
9913174
·
1 Parent(s): dbbe3fe

static translator for now

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. .gradio/certificate.pem +31 -0
  3. app.py +303 -0
  4. requirements.txt +73 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv/
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sounddevice as sd
2
+ import soundfile as sf
3
+ import torch
4
+ import numpy as np
5
+ import torchaudio
6
+ import time
7
+ import gradio as gr
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ from transformers import AutoProcessor, AutoModel
10
+ from queue import Queue, Empty
11
+ import warnings
12
+ import traceback
13
+ import whisper
14
+ import gc
15
+
16
+ warnings.filterwarnings("ignore")
17
+
18
+ class OptimizedContinuousTranslator:
19
+ def __init__(self, target_language="spa", chunk_duration=3, sample_rate=16000):
20
+ try:
21
+ self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
22
+ self.model = AutoModel.from_pretrained("facebook/seamless-m4t-v2-large")
23
+ self.target_language = target_language
24
+ except Exception as e:
25
+ print(f"Error loading model: {e}")
26
+ self.processor = None
27
+ self.model = None
28
+
29
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
+
31
+ def wav_to_tensor(self, file_path, sampling_rate):
32
+ """
33
+ Converts a WAV file into a PyTorch tensor.
34
+
35
+ Args:
36
+ file_path (str): Path to the WAV file.
37
+
38
+ Returns:
39
+ torch.Tensor: Audio tensor.
40
+ int: Sampling rate of the audio.
41
+ """
42
+ # Load the WAV file
43
+ waveform, sample_rate = torchaudio.load(file_path)
44
+ # Resample if the original sampling rate is not 16000 Hz
45
+ if sample_rate != sampling_rate:
46
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=sampling_rate)
47
+ waveform = resampler(waveform)
48
+
49
+ return waveform, sampling_rate
50
+
51
+ def translate_audio(self, audio_file_path):
52
+ """
53
+ Enhanced audio translation with improved error handling and memory management
54
+
55
+ Args:
56
+ audio (torch.Tensor): Audio chunk to translate
57
+
58
+ Returns:
59
+ str: Translated text or error message
60
+ """
61
+ print("REACHED")
62
+ if audio_file_path is None or self.processor is None or self.model is None:
63
+ print(f"{audio_file_path} {self.processor} {self.model}")
64
+ return ""
65
+
66
+ try:
67
+ # Prepare audio inputs
68
+ wavform, sample_rate = self.wav_to_tensor(audio_file_path, 16000)
69
+ audio_inputs = self.processor(audios=wavform.unsqueeze(0), return_tensors="pt", sampling_rate=sample_rate)
70
+
71
+ # Move inputs to the correct device
72
+ audio_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
73
+ for k, v in audio_inputs.items()}
74
+
75
+ # Generate translation
76
+ output_tokens = self.model.generate(
77
+ **audio_inputs,
78
+ tgt_lang=self.target_language,
79
+ generate_speech=False
80
+ )
81
+
82
+ # Decode the translated text
83
+ translated_text = self.processor.decode(
84
+ output_tokens[0].tolist()[0],
85
+ skip_special_tokens=True
86
+ )
87
+ print(translated_text)
88
+ return translated_text
89
+ except Exception as e:
90
+ error_message = f"Translation error: {str(e)}"
91
+ stack_trace = traceback.format_exc()
92
+ print(f"{error_message}\n{stack_trace}")
93
+ return ""
94
+ finally:
95
+ # Aggressive memory cleanup
96
+ if torch.cuda.is_available():
97
+ torch.cuda.empty_cache()
98
+ gc.collect()
99
+
100
+
101
+ # web app
102
+ # simple translator (no real time)
103
+ def create_translator_interface():
104
+ """Create the optimized Gradio interface for the Continuous Translator"""
105
+ # Initialize the translator
106
+ translator = OptimizedContinuousTranslator()
107
+
108
+ with gr.Blocks(title="Continuous Audio Translator") as demo:
109
+ # Usage Instructions in a Markdown Dropdown
110
+ gr.Markdown("""
111
+ ## 🎙️ Audio Translator: How to Use
112
+
113
+ <details>
114
+ <summary>Click to view usage instructions</summary>
115
+
116
+ ### 🌐 Translation Steps
117
+ 1. **Select Target Language**:
118
+ - Choose the language you want to translate to from the dropdown menu
119
+
120
+ 2. **Record Audio**:
121
+ - Click on the microphone icon in the audio input area
122
+ - Record your audio clearly and concisely
123
+ - Ensure minimal background noise for best results
124
+
125
+ 3. **Translate**:
126
+ - After recording, click the "Translate" button
127
+ - The translated text will appear in the transcript box below
128
+
129
+ ### 💡 Tips
130
+ - Speak clearly and at a moderate pace
131
+ - Avoid complex or technical language for more accurate translations
132
+ - The translation works best with shorter, simpler sentences
133
+ - Maximum recommended recording time is around 30 seconds
134
+
135
+ ### 🌍 Supported Languages
136
+ - Input: Currently supports clear spoken language
137
+ - Output: Any of the languages you choose from
138
+ </details>
139
+ """)
140
+
141
+ languages = {
142
+ "afr": "Afrikaans",
143
+ "amh": "Amharic",
144
+ "arb": "Modern Standard Arabic",
145
+ "ary": "Moroccan Arabic",
146
+ "arz": "Egyptian Arabic",
147
+ "asm": "Assamese",
148
+ "ast": "Asturian",
149
+ "azj": "North Azerbaijani",
150
+ "bel": "Belarusian",
151
+ "ben": "Bengali",
152
+ "bos": "Bosnian",
153
+ "bul": "Bulgarian",
154
+ "cat": "Catalan",
155
+ "ceb": "Cebuano",
156
+ "ces": "Czech",
157
+ "ckb": "Central Kurdish",
158
+ "cmn": "Mandarin Chinese",
159
+ "cmn_Hant": "Mandarin Chinese (Traditional)",
160
+ "cym": "Welsh",
161
+ "dan": "Danish",
162
+ "deu": "German",
163
+ "ell": "Greek",
164
+ "eng": "English",
165
+ "est": "Estonian",
166
+ "eus": "Basque",
167
+ "fin": "Finnish",
168
+ "fra": "French",
169
+ "fuv": "Nigerian Fulfulde",
170
+ "gaz": "West Central Oromo",
171
+ "gle": "Irish",
172
+ "glg": "Galician",
173
+ "guj": "Gujarati",
174
+ "heb": "Hebrew",
175
+ "hin": "Hindi",
176
+ "hrv": "Croatian",
177
+ "hun": "Hungarian",
178
+ "hye": "Armenian",
179
+ "ibo": "Igbo",
180
+ "ind": "Indonesian",
181
+ "isl": "Icelandic",
182
+ "ita": "Italian",
183
+ "jav": "Javanese",
184
+ "jpn": "Japanese",
185
+ "kam": "Kamba",
186
+ "kan": "Kannada",
187
+ "kat": "Georgian",
188
+ "kaz": "Kazakh",
189
+ "kea": "Kabuverdianu",
190
+ "khk": "Halh Mongolian",
191
+ "khm": "Khmer",
192
+ "kir": "Kyrgyz",
193
+ "kor": "Korean",
194
+ "lao": "Lao",
195
+ "lit": "Lithuanian",
196
+ "ltz": "Luxembourgish",
197
+ "lug": "Ganda",
198
+ "luo": "Luo",
199
+ "lvs": "Standard Latvian",
200
+ "mai": "Maithili",
201
+ "mal": "Malayalam",
202
+ "mar": "Marathi",
203
+ "mkd": "Macedonian",
204
+ "mlt": "Maltese",
205
+ "mni": "Meitei",
206
+ "mya": "Burmese",
207
+ "nld": "Dutch",
208
+ "nno": "Norwegian Nynorsk",
209
+ "nob": "Norwegian Bokmål",
210
+ "npi": "Nepali",
211
+ "nya": "Nyanja",
212
+ "oci": "Occitan",
213
+ "ory": "Odia",
214
+ "pan": "Punjabi",
215
+ "pbt": "Southern Pashto",
216
+ "pes": "Western Persian",
217
+ "pol": "Polish",
218
+ "por": "Portuguese",
219
+ "ron": "Romanian",
220
+ "rus": "Russian",
221
+ "slk": "Slovak",
222
+ "slv": "Slovenian",
223
+ "sna": "Shona",
224
+ "snd": "Sindhi",
225
+ "som": "Somali",
226
+ "spa": "Spanish",
227
+ "srp": "Serbian",
228
+ "swe": "Swedish",
229
+ "swh": "Swahili",
230
+ "tam": "Tamil",
231
+ "tel": "Telugu",
232
+ "tgk": "Tajik",
233
+ "tgl": "Tagalog",
234
+ "tha": "Thai",
235
+ "tur": "Turkish",
236
+ "ukr": "Ukrainian",
237
+ "urd": "Urdu",
238
+ "uzn": "Northern Uzbek",
239
+ "vie": "Vietnamese",
240
+ "xho": "Xhosa",
241
+ "yor": "Yoruba",
242
+ "yue": "Cantonese",
243
+ "zlm": "Colloquial Malay",
244
+ "zsm": "Standard Malay",
245
+ "zul": "Zulu",
246
+ }
247
+
248
+ # Language Dropdown
249
+ with gr.Row():
250
+ # Generate the choices for the dropdown: display names mapped to their keys
251
+ language_choices = [(name, code) for code, name in languages.items()]
252
+
253
+ language_dropdown = gr.Dropdown(
254
+ choices=language_choices, # Each choice is a (display, value) tuple
255
+ value="spa", # Default value corresponds to the key
256
+ label="Target Language",
257
+ scale=2
258
+ )
259
+
260
+ # Audio Input
261
+ audio_input = gr.Audio(label="Record Audio", sources="microphone", type="filepath")
262
+
263
+ # Display Components
264
+ transcript_box = gr.Textbox(label="Full Transcript", lines=10, interactive=False)
265
+
266
+ # Control Buttons
267
+ with gr.Row():
268
+ start_btn = gr.Button("Translate")
269
+
270
+ # Define the translation action
271
+ def handle_translation(audio_file, target_language):
272
+ """Handle the audio file and pass it to the translator for processing."""
273
+ if not audio_file:
274
+ return "No audio file provided. Please record and try again."
275
+
276
+ translator.target_language = target_language # Set the target language in the translator
277
+ try:
278
+ translated_text = translator.translate_audio(audio_file)
279
+ return translated_text if translated_text else "Translation failed."
280
+ except Exception as e:
281
+ return f"Error: {str(e)}"
282
+
283
+ # Set the Gradio action
284
+ start_btn.click(
285
+ fn=handle_translation,
286
+ inputs=[audio_input, language_dropdown],
287
+ outputs=transcript_box
288
+ )
289
+
290
+ return demo
291
+
292
+
293
+ def main():
294
+ """Launch the Gradio app with optimized settings"""
295
+ interface = create_translator_interface()
296
+ interface.launch(
297
+ share=False,
298
+ show_error=True,
299
+ debug=True # Helpful for development
300
+ )
301
+
302
+ if __name__ == "__main__":
303
+ main()
requirements.txt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.6.2.post1
4
+ certifi==2024.8.30
5
+ cffi==1.17.1
6
+ charset-normalizer==3.4.0
7
+ click==8.1.7
8
+ fastapi==0.115.5
9
+ ffmpy==0.4.0
10
+ filelock==3.16.1
11
+ fsspec==2024.10.0
12
+ gradio==5.7.0
13
+ gradio_client==1.5.0
14
+ h11==0.14.0
15
+ httpcore==1.0.7
16
+ httpx==0.27.2
17
+ huggingface-hub==0.26.2
18
+ idna==3.10
19
+ Jinja2==3.1.4
20
+ llvmlite==0.43.0
21
+ markdown-it-py==3.0.0
22
+ MarkupSafe==2.1.5
23
+ mdurl==0.1.2
24
+ more-itertools==10.5.0
25
+ mpmath==1.3.0
26
+ networkx==3.4.2
27
+ numba==0.60.0
28
+ numpy==2.0.2
29
+ openai-whisper==20240930
30
+ orjson==3.10.12
31
+ packaging==24.2
32
+ pandas==2.2.3
33
+ pillow==11.0.0
34
+ protobuf==5.29.0
35
+ pycparser==2.22
36
+ pydantic==2.10.2
37
+ pydantic_core==2.27.1
38
+ pydub==0.25.1
39
+ Pygments==2.18.0
40
+ python-dateutil==2.9.0.post0
41
+ python-multipart==0.0.12
42
+ pytz==2024.2
43
+ PyYAML==6.0.2
44
+ regex==2024.11.6
45
+ requests==2.32.3
46
+ rich==13.9.4
47
+ ruff==0.8.0
48
+ safehttpx==0.1.1
49
+ safetensors==0.4.5
50
+ semantic-version==2.10.0
51
+ sentencepiece==0.2.0
52
+ setuptools==75.6.0
53
+ shellingham==1.5.4
54
+ six==1.16.0
55
+ sniffio==1.3.1
56
+ sounddevice==0.5.1
57
+ soundfile==0.12.1
58
+ starlette==0.41.3
59
+ sympy==1.13.1
60
+ tiktoken==0.8.0
61
+ tokenizers==0.20.3
62
+ tomlkit==0.12.0
63
+ torch==2.5.1
64
+ torchaudio==2.5.1
65
+ tqdm==4.67.1
66
+ transformers==4.46.3
67
+ typer==0.13.1
68
+ typing_extensions==4.12.2
69
+ tzdata==2024.2
70
+ urllib3==2.2.3
71
+ uvicorn==0.32.1
72
+ websockets==12.0
73
+ whisper==1.1.10