Spaces:
Running
Running
import gradio as gr | |
import torch | |
import torchaudio | |
import numpy as np | |
from transformers import AutoProcessor, SeamlessM4Tv2Model | |
from datetime import datetime | |
class SeamlessTranslator: | |
def __init__(self): | |
self.model_name = "facebook/seamless-m4t-v2-large" | |
print("Loading model...") | |
self.processor = AutoProcessor.from_pretrained(self.model_name) | |
self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name) | |
self.sample_rate = self.model.config.sampling_rate | |
self.languages = { | |
"๐บ๐ธ English": "eng", | |
"๐ช๐ธ Spanish": "spa", | |
"๐ซ๐ท French": "fra", | |
"๐ฉ๐ช German": "deu", | |
"๐ฎ๐น Italian": "ita", | |
"๐ต๐น Portuguese": "por", | |
"๐ท๐บ Russian": "rus", | |
"๐จ๐ณ Chinese": "cmn", | |
"๐ฏ๐ต Japanese": "jpn", | |
"๐ฐ๐ท Korean": "kor" | |
} | |
def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()): | |
progress(0.3, desc="Processing input...") | |
try: | |
inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt") | |
progress(0.6, desc="Generating audio...") | |
audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze() | |
progress(1.0, desc="Done!") | |
return (self.sample_rate, audio_array), f"โ Translation completed: {src_lang} โ {tgt_lang}" | |
except Exception as e: | |
raise gr.Error(f"โ Translation failed: {str(e)}") | |
def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()): | |
if audio_path is None: | |
raise gr.Error("โ Please upload an audio file") | |
progress(0.3, desc="Loading audio...") | |
try: | |
audio, orig_freq = torchaudio.load(audio_path) | |
audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000) | |
progress(0.6, desc="Translating...") | |
inputs = self.processor(audios=audio, return_tensors="pt") | |
audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze() | |
progress(1.0, desc="Done!") | |
return (self.sample_rate, audio_array), "โ Audio translation completed" | |
except Exception as e: | |
raise gr.Error(f"โ Translation failed: {str(e)}") | |
css = """ | |
#component-0 { | |
max-width: 1200px; | |
margin: auto; | |
padding: 20px; | |
} | |
.container { | |
border-radius: 16px; | |
padding: 24px; | |
background: white; | |
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1); | |
} | |
.header-container { | |
text-align: center; | |
margin-bottom: 2rem; | |
padding: 2rem; | |
background: linear-gradient(135deg, #f8fafc, #e2e8f0); | |
border-radius: 16px; | |
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1); | |
} | |
.title { | |
font-size: 2.75rem; | |
font-weight: 800; | |
background: linear-gradient(135deg, #1e40af, #3b82f6); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent; | |
margin-bottom: 0.5rem; | |
} | |
.subtitle { | |
color: #64748b; | |
font-size: 1.1rem; | |
margin-bottom: 1rem; | |
} | |
.stats { | |
display: flex; | |
justify-content: center; | |
gap: 2rem; | |
margin-top: 1rem; | |
} | |
.stat-item { | |
text-align: center; | |
padding: 0.5rem 1rem; | |
background: white; | |
border-radius: 8px; | |
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1); | |
} | |
.stat-value { | |
font-weight: 600; | |
color: #1e40af; | |
font-size: 1.25rem; | |
} | |
.stat-label { | |
color: #64748b; | |
font-size: 0.875rem; | |
} | |
.tab-nav { | |
background: white; | |
padding: 0.5rem; | |
border-radius: 12px; | |
margin-bottom: 1.5rem; | |
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1); | |
} | |
.gr-form { | |
border: none !important; | |
background: white; | |
padding: 1.5rem !important; | |
border-radius: 12px !important; | |
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1); | |
} | |
.gr-box { | |
border-radius: 12px !important; | |
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1); | |
} | |
.gr-button { | |
border-radius: 8px !important; | |
background: linear-gradient(135deg, #1e40af, #3b82f6) !important; | |
color: white !important; | |
font-weight: 600 !important; | |
padding: 0.75rem 1.5rem !important; | |
transition: all 0.2s !important; | |
} | |
.gr-button:hover { | |
transform: translateY(-1px) !important; | |
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1) !important; | |
} | |
.gr-input, .gr-select { | |
border-radius: 8px !important; | |
border: 1px solid #e2e8f0 !important; | |
padding: 0.75rem !important; | |
transition: all 0.2s !important; | |
} | |
.gr-input:focus, .gr-select:focus { | |
border-color: #3b82f6 !important; | |
box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1) !important; | |
} | |
.gr-panel { | |
border-radius: 12px !important; | |
border: none !important; | |
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1) !important; | |
} | |
.output-container { | |
background: #f8fafc; | |
padding: 1.5rem; | |
border-radius: 12px; | |
margin-top: 1rem; | |
} | |
.output-label { | |
font-weight: 600; | |
color: #1e40af; | |
margin-bottom: 0.75rem; | |
font-size: 1.1rem; | |
} | |
.status-message { | |
padding: 0.75rem; | |
border-radius: 8px; | |
margin-top: 1rem; | |
font-size: 0.875rem; | |
background: #f0f9ff; | |
border-left: 4px solid #3b82f6; | |
} | |
.footer { | |
text-align: center; | |
margin-top: 3rem; | |
padding: 1.5rem; | |
background: white; | |
border-radius: 12px; | |
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1); | |
} | |
.footer-text { | |
color: #64748b; | |
font-size: 0.875rem; | |
} | |
.feature-grid { | |
display: grid; | |
grid-template-columns: repeat(3, 1fr); | |
gap: 1rem; | |
margin: 1rem 0; | |
} | |
.feature-item { | |
background: white; | |
padding: 1rem; | |
border-radius: 8px; | |
text-align: center; | |
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1); | |
} | |
.feature-icon { | |
font-size: 1.5rem; | |
margin-bottom: 0.5rem; | |
color: #3b82f6; | |
} | |
.language-pair { | |
display: flex; | |
align-items: center; | |
gap: 1rem; | |
margin: 1rem 0; | |
padding: 0.75rem; | |
background: #f8fafc; | |
border-radius: 8px; | |
} | |
.language-arrow { | |
color: #3b82f6; | |
font-weight: bold; | |
} | |
""" | |
def create_ui(): | |
translator = SeamlessTranslator() | |
with gr.Blocks(css=css, title="A.R.I.S. Translator") as demo: | |
# Header | |
with gr.Column(elem_class="header-container"): | |
gr.HTML( | |
""" | |
<div class="title">A.R.I.S. Translator</div> | |
<div class="subtitle">Advanced Real-time Interpretation System</div> | |
<div class="stats"> | |
<div class="stat-item"> | |
<div class="stat-value">10</div> | |
<div class="stat-label">Languages</div> | |
</div> | |
<div class="stat-item"> | |
<div class="stat-value">Real-time</div> | |
<div class="stat-label">Translation</div> | |
</div> | |
<div class="stat-item"> | |
<div class="stat-value">Neural</div> | |
<div class="stat-label">Technology</div> | |
</div> | |
</div> | |
""" | |
) | |
# Main content | |
with gr.Tabs() as tabs: | |
# Text Translation Tab | |
with gr.Tab("๐ค Text Translation", id=1): | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox( | |
label="Text to Translate", | |
placeholder="Enter your text here...", | |
lines=5, | |
elem_classes="gr-input" | |
) | |
with gr.Row(): | |
src_lang = gr.Dropdown( | |
choices=list(translator.languages.keys()), | |
value="๐บ๐ธ English", | |
label="Source Language", | |
elem_classes="gr-select" | |
) | |
tgt_lang = gr.Dropdown( | |
choices=list(translator.languages.keys()), | |
value="๐ช๐ธ Spanish", | |
label="Target Language", | |
elem_classes="gr-select" | |
) | |
translate_btn = gr.Button("๐ Translate", variant="primary") | |
status_text = gr.Textbox( | |
label="Status", | |
interactive=False, | |
elem_classes="status-message" | |
) | |
with gr.Column(): | |
with gr.Box(elem_classes="output-container"): | |
gr.HTML('<div class="output-label">๐ Translation Output</div>') | |
audio_output = gr.Audio( | |
label="Translated Audio", | |
type="numpy" | |
) | |
# Audio Translation Tab | |
with gr.Tab("๐ค Audio Translation", id=2): | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio( | |
label="Upload Audio", | |
type="filepath" | |
) | |
tgt_lang_audio = gr.Dropdown( | |
choices=list(translator.languages.keys()), | |
value="๐บ๐ธ English", | |
label="Target Language", | |
elem_classes="gr-select" | |
) | |
translate_audio_btn = gr.Button("๐ Translate Audio", variant="primary") | |
status_text_audio = gr.Textbox( | |
label="Status", | |
interactive=False, | |
elem_classes="status-message" | |
) | |
with gr.Column(): | |
with gr.Box(elem_classes="output-container"): | |
gr.HTML('<div class="output-label">๐ Translation Output</div>') | |
audio_output_from_audio = gr.Audio( | |
label="Translated Audio", | |
type="numpy" | |
) | |
# Features Grid | |
gr.HTML( | |
""" | |
<div class="feature-grid"> | |
<div class="feature-item"> | |
<div class="feature-icon">๐</div> | |
<div>10 Languages</div> | |
</div> | |
<div class="feature-item"> | |
<div class="feature-icon">โก</div> | |
<div>Real-time Processing</div> | |
</div> | |
<div class="feature-item"> | |
<div class="feature-icon">๐ฏ</div> | |
<div>High Accuracy</div> | |
</div> | |
</div> | |
""" | |
) | |
# Footer | |
gr.HTML( | |
""" | |
<div class="footer"> | |
<div class="footer-text"> | |
Powered by Meta's SeamlessM4T model | Built with โค๏ธ using Gradio | |
</div> | |
</div> | |
""" | |
) | |
# Event handlers | |
translate_btn.click( | |
fn=translator.translate_text, | |
inputs=[text_input, src_lang, tgt_lang], | |
outputs=[audio_output, status_text] | |
) | |
translate_audio_btn.click( | |
fn=translator.translate_audio, | |
inputs=[audio_input, tgt_lang_audio], | |
outputs=[audio_output_from_audio, status_text_audio] | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_ui() | |
demo.queue() | |
demo.launch() |