DHEIVER's picture
Update app.py
7f1ced9 verified
raw
history blame
11.9 kB
import gradio as gr
import torch
import torchaudio
import numpy as np
from transformers import AutoProcessor, SeamlessM4Tv2Model
from datetime import datetime
class SeamlessTranslator:
def __init__(self):
self.model_name = "facebook/seamless-m4t-v2-large"
print("Loading model...")
self.processor = AutoProcessor.from_pretrained(self.model_name)
self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name)
self.sample_rate = self.model.config.sampling_rate
self.languages = {
"๐Ÿ‡บ๐Ÿ‡ธ English": "eng",
"๐Ÿ‡ช๐Ÿ‡ธ Spanish": "spa",
"๐Ÿ‡ซ๐Ÿ‡ท French": "fra",
"๐Ÿ‡ฉ๐Ÿ‡ช German": "deu",
"๐Ÿ‡ฎ๐Ÿ‡น Italian": "ita",
"๐Ÿ‡ต๐Ÿ‡น Portuguese": "por",
"๐Ÿ‡ท๐Ÿ‡บ Russian": "rus",
"๐Ÿ‡จ๐Ÿ‡ณ Chinese": "cmn",
"๐Ÿ‡ฏ๐Ÿ‡ต Japanese": "jpn",
"๐Ÿ‡ฐ๐Ÿ‡ท Korean": "kor"
}
def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()):
progress(0.3, desc="Processing input...")
try:
inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt")
progress(0.6, desc="Generating audio...")
audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
progress(1.0, desc="Done!")
return (self.sample_rate, audio_array), f"โœ… Translation completed: {src_lang} โ†’ {tgt_lang}"
except Exception as e:
raise gr.Error(f"โŒ Translation failed: {str(e)}")
def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()):
if audio_path is None:
raise gr.Error("โŒ Please upload an audio file")
progress(0.3, desc="Loading audio...")
try:
audio, orig_freq = torchaudio.load(audio_path)
audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
progress(0.6, desc="Translating...")
inputs = self.processor(audios=audio, return_tensors="pt")
audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
progress(1.0, desc="Done!")
return (self.sample_rate, audio_array), "โœ… Audio translation completed"
except Exception as e:
raise gr.Error(f"โŒ Translation failed: {str(e)}")
css = """
#component-0 {
max-width: 1200px;
margin: auto;
padding: 20px;
}
.container {
border-radius: 16px;
padding: 24px;
background: white;
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1);
}
.header-container {
text-align: center;
margin-bottom: 2rem;
padding: 2rem;
background: linear-gradient(135deg, #f8fafc, #e2e8f0);
border-radius: 16px;
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1);
}
.title {
font-size: 2.75rem;
font-weight: 800;
background: linear-gradient(135deg, #1e40af, #3b82f6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 0.5rem;
}
.subtitle {
color: #64748b;
font-size: 1.1rem;
margin-bottom: 1rem;
}
.stats {
display: flex;
justify-content: center;
gap: 2rem;
margin-top: 1rem;
}
.stat-item {
text-align: center;
padding: 0.5rem 1rem;
background: white;
border-radius: 8px;
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
}
.stat-value {
font-weight: 600;
color: #1e40af;
font-size: 1.25rem;
}
.stat-label {
color: #64748b;
font-size: 0.875rem;
}
.tab-nav {
background: white;
padding: 0.5rem;
border-radius: 12px;
margin-bottom: 1.5rem;
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
}
.gr-form {
border: none !important;
background: white;
padding: 1.5rem !important;
border-radius: 12px !important;
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
}
.gr-box {
border-radius: 12px !important;
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
}
.gr-button {
border-radius: 8px !important;
background: linear-gradient(135deg, #1e40af, #3b82f6) !important;
color: white !important;
font-weight: 600 !important;
padding: 0.75rem 1.5rem !important;
transition: all 0.2s !important;
}
.gr-button:hover {
transform: translateY(-1px) !important;
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1) !important;
}
.gr-input, .gr-select {
border-radius: 8px !important;
border: 1px solid #e2e8f0 !important;
padding: 0.75rem !important;
transition: all 0.2s !important;
}
.gr-input:focus, .gr-select:focus {
border-color: #3b82f6 !important;
box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1) !important;
}
.gr-panel {
border-radius: 12px !important;
border: none !important;
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1) !important;
}
.output-container {
background: #f8fafc;
padding: 1.5rem;
border-radius: 12px;
margin-top: 1rem;
}
.output-label {
font-weight: 600;
color: #1e40af;
margin-bottom: 0.75rem;
font-size: 1.1rem;
}
.status-message {
padding: 0.75rem;
border-radius: 8px;
margin-top: 1rem;
font-size: 0.875rem;
background: #f0f9ff;
border-left: 4px solid #3b82f6;
}
.footer {
text-align: center;
margin-top: 3rem;
padding: 1.5rem;
background: white;
border-radius: 12px;
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
}
.footer-text {
color: #64748b;
font-size: 0.875rem;
}
.feature-grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 1rem;
margin: 1rem 0;
}
.feature-item {
background: white;
padding: 1rem;
border-radius: 8px;
text-align: center;
box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
}
.feature-icon {
font-size: 1.5rem;
margin-bottom: 0.5rem;
color: #3b82f6;
}
.language-pair {
display: flex;
align-items: center;
gap: 1rem;
margin: 1rem 0;
padding: 0.75rem;
background: #f8fafc;
border-radius: 8px;
}
.language-arrow {
color: #3b82f6;
font-weight: bold;
}
"""
def create_ui():
translator = SeamlessTranslator()
with gr.Blocks(css=css, title="A.R.I.S. Translator") as demo:
# Header
with gr.Column(elem_class="header-container"):
gr.HTML(
"""
<div class="title">A.R.I.S. Translator</div>
<div class="subtitle">Advanced Real-time Interpretation System</div>
<div class="stats">
<div class="stat-item">
<div class="stat-value">10</div>
<div class="stat-label">Languages</div>
</div>
<div class="stat-item">
<div class="stat-value">Real-time</div>
<div class="stat-label">Translation</div>
</div>
<div class="stat-item">
<div class="stat-value">Neural</div>
<div class="stat-label">Technology</div>
</div>
</div>
"""
)
# Main content
with gr.Tabs() as tabs:
# Text Translation Tab
with gr.Tab("๐Ÿ”ค Text Translation", id=1):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Text to Translate",
placeholder="Enter your text here...",
lines=5,
elem_classes="gr-input"
)
with gr.Row():
src_lang = gr.Dropdown(
choices=list(translator.languages.keys()),
value="๐Ÿ‡บ๐Ÿ‡ธ English",
label="Source Language",
elem_classes="gr-select"
)
tgt_lang = gr.Dropdown(
choices=list(translator.languages.keys()),
value="๐Ÿ‡ช๐Ÿ‡ธ Spanish",
label="Target Language",
elem_classes="gr-select"
)
translate_btn = gr.Button("๐Ÿ”„ Translate", variant="primary")
status_text = gr.Textbox(
label="Status",
interactive=False,
elem_classes="status-message"
)
with gr.Column():
with gr.Box(elem_classes="output-container"):
gr.HTML('<div class="output-label">๐Ÿ”Š Translation Output</div>')
audio_output = gr.Audio(
label="Translated Audio",
type="numpy"
)
# Audio Translation Tab
with gr.Tab("๐ŸŽค Audio Translation", id=2):
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
label="Upload Audio",
type="filepath"
)
tgt_lang_audio = gr.Dropdown(
choices=list(translator.languages.keys()),
value="๐Ÿ‡บ๐Ÿ‡ธ English",
label="Target Language",
elem_classes="gr-select"
)
translate_audio_btn = gr.Button("๐Ÿ”„ Translate Audio", variant="primary")
status_text_audio = gr.Textbox(
label="Status",
interactive=False,
elem_classes="status-message"
)
with gr.Column():
with gr.Box(elem_classes="output-container"):
gr.HTML('<div class="output-label">๐Ÿ”Š Translation Output</div>')
audio_output_from_audio = gr.Audio(
label="Translated Audio",
type="numpy"
)
# Features Grid
gr.HTML(
"""
<div class="feature-grid">
<div class="feature-item">
<div class="feature-icon">๐ŸŒ</div>
<div>10 Languages</div>
</div>
<div class="feature-item">
<div class="feature-icon">โšก</div>
<div>Real-time Processing</div>
</div>
<div class="feature-item">
<div class="feature-icon">๐ŸŽฏ</div>
<div>High Accuracy</div>
</div>
</div>
"""
)
# Footer
gr.HTML(
"""
<div class="footer">
<div class="footer-text">
Powered by Meta's SeamlessM4T model | Built with โค๏ธ using Gradio
</div>
</div>
"""
)
# Event handlers
translate_btn.click(
fn=translator.translate_text,
inputs=[text_input, src_lang, tgt_lang],
outputs=[audio_output, status_text]
)
translate_audio_btn.click(
fn=translator.translate_audio,
inputs=[audio_input, tgt_lang_audio],
outputs=[audio_output_from_audio, status_text_audio]
)
return demo
if __name__ == "__main__":
demo = create_ui()
demo.queue()
demo.launch()