Spaces:
Running
on
L40S
Running
on
L40S
“siddhu001”
commited on
Commit
·
71fd664
1
Parent(s):
6ebbb2b
Add input text box
Browse files
app.py
CHANGED
@@ -283,6 +283,7 @@ def start_warmup():
|
|
283 |
except Exception:
|
284 |
print("Removing " + opt + " from ASR options since it cannot be loaded.")
|
285 |
ASR_options = ASR_options[:opt_count] + ASR_options[(opt_count + 1) :]
|
|
|
286 |
if opt == ASR_name:
|
287 |
ASR_name = ASR_options[0]
|
288 |
for opt_count in range(len(LLM_options)):
|
@@ -345,6 +346,7 @@ def transcribe(
|
|
345 |
ASR_option: str,
|
346 |
LLM_option: str,
|
347 |
type_option: str,
|
|
|
348 |
):
|
349 |
"""
|
350 |
Processes and transcribes an audio stream in real-time.
|
@@ -420,6 +422,15 @@ def transcribe(
|
|
420 |
audio_output1 = None
|
421 |
else:
|
422 |
stream = np.concatenate((stream, y))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
(
|
424 |
asr_output_str,
|
425 |
text_str,
|
@@ -512,6 +523,13 @@ with gr.Blocks(
|
|
512 |
(https://github.com/siddhu001/espnet/tree/sds_demo_recipe/egs2/TEMPLATE/sds1#how-to-use).
|
513 |
"""
|
514 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
515 |
with gr.Row():
|
516 |
with gr.Column(scale=1):
|
517 |
user_audio = gr.Audio(
|
@@ -519,6 +537,12 @@ with gr.Blocks(
|
|
519 |
streaming=True,
|
520 |
waveform_options=gr.WaveformOptions(sample_rate=16000),
|
521 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
with gr.Row():
|
523 |
type_radio = gr.Radio(
|
524 |
choices=["Cascaded", "E2E"],
|
@@ -686,7 +710,7 @@ with gr.Blocks(
|
|
686 |
)
|
687 |
user_audio.stream(
|
688 |
transcribe,
|
689 |
-
inputs=[state, user_audio, radio, ASR_radio, LLM_radio, type_radio],
|
690 |
outputs=[state, output_asr_text, output_text, output_audio, output_audio1],
|
691 |
).then(
|
692 |
lambda *args: callback.flag(list(args)), [user_audio], None, preprocess=False
|
|
|
283 |
except Exception:
|
284 |
print("Removing " + opt + " from ASR options since it cannot be loaded.")
|
285 |
ASR_options = ASR_options[:opt_count] + ASR_options[(opt_count + 1) :]
|
286 |
+
opt_count -=1
|
287 |
if opt == ASR_name:
|
288 |
ASR_name = ASR_options[0]
|
289 |
for opt_count in range(len(LLM_options)):
|
|
|
346 |
ASR_option: str,
|
347 |
LLM_option: str,
|
348 |
type_option: str,
|
349 |
+
input_text: str,
|
350 |
):
|
351 |
"""
|
352 |
Processes and transcribes an audio stream in real-time.
|
|
|
422 |
audio_output1 = None
|
423 |
else:
|
424 |
stream = np.concatenate((stream, y))
|
425 |
+
# import pdb;pdb.set_trace()
|
426 |
+
dialogue_model.chat.init_chat(
|
427 |
+
{
|
428 |
+
"role": "system",
|
429 |
+
"content": (
|
430 |
+
input_text
|
431 |
+
),
|
432 |
+
}
|
433 |
+
)
|
434 |
(
|
435 |
asr_output_str,
|
436 |
text_str,
|
|
|
523 |
(https://github.com/siddhu001/espnet/tree/sds_demo_recipe/egs2/TEMPLATE/sds1#how-to-use).
|
524 |
"""
|
525 |
)
|
526 |
+
default_instruct=(
|
527 |
+
"You are a helpful and friendly AI "
|
528 |
+
"assistant. "
|
529 |
+
"You are polite, respectful, and aim to "
|
530 |
+
"provide concise and complete responses of "
|
531 |
+
"less than 15 words."
|
532 |
+
)
|
533 |
with gr.Row():
|
534 |
with gr.Column(scale=1):
|
535 |
user_audio = gr.Audio(
|
|
|
537 |
streaming=True,
|
538 |
waveform_options=gr.WaveformOptions(sample_rate=16000),
|
539 |
)
|
540 |
+
input_text=gr.Textbox(
|
541 |
+
label="LLM prompt",
|
542 |
+
visible=True,
|
543 |
+
interactive=True,
|
544 |
+
value=default_instruct
|
545 |
+
)
|
546 |
with gr.Row():
|
547 |
type_radio = gr.Radio(
|
548 |
choices=["Cascaded", "E2E"],
|
|
|
710 |
)
|
711 |
user_audio.stream(
|
712 |
transcribe,
|
713 |
+
inputs=[state, user_audio, radio, ASR_radio, LLM_radio, type_radio, input_text],
|
714 |
outputs=[state, output_asr_text, output_text, output_audio, output_audio1],
|
715 |
).then(
|
716 |
lambda *args: callback.flag(list(args)), [user_audio], None, preprocess=False
|