“siddhu001” commited on
Commit
71fd664
·
1 Parent(s): 6ebbb2b

Add input text box

Browse files
Files changed (1) hide show
  1. app.py +25 -1
app.py CHANGED
@@ -283,6 +283,7 @@ def start_warmup():
283
  except Exception:
284
  print("Removing " + opt + " from ASR options since it cannot be loaded.")
285
  ASR_options = ASR_options[:opt_count] + ASR_options[(opt_count + 1) :]
 
286
  if opt == ASR_name:
287
  ASR_name = ASR_options[0]
288
  for opt_count in range(len(LLM_options)):
@@ -345,6 +346,7 @@ def transcribe(
345
  ASR_option: str,
346
  LLM_option: str,
347
  type_option: str,
 
348
  ):
349
  """
350
  Processes and transcribes an audio stream in real-time.
@@ -420,6 +422,15 @@ def transcribe(
420
  audio_output1 = None
421
  else:
422
  stream = np.concatenate((stream, y))
 
 
 
 
 
 
 
 
 
423
  (
424
  asr_output_str,
425
  text_str,
@@ -512,6 +523,13 @@ with gr.Blocks(
512
  (https://github.com/siddhu001/espnet/tree/sds_demo_recipe/egs2/TEMPLATE/sds1#how-to-use).
513
  """
514
  )
 
 
 
 
 
 
 
515
  with gr.Row():
516
  with gr.Column(scale=1):
517
  user_audio = gr.Audio(
@@ -519,6 +537,12 @@ with gr.Blocks(
519
  streaming=True,
520
  waveform_options=gr.WaveformOptions(sample_rate=16000),
521
  )
 
 
 
 
 
 
522
  with gr.Row():
523
  type_radio = gr.Radio(
524
  choices=["Cascaded", "E2E"],
@@ -686,7 +710,7 @@ with gr.Blocks(
686
  )
687
  user_audio.stream(
688
  transcribe,
689
- inputs=[state, user_audio, radio, ASR_radio, LLM_radio, type_radio],
690
  outputs=[state, output_asr_text, output_text, output_audio, output_audio1],
691
  ).then(
692
  lambda *args: callback.flag(list(args)), [user_audio], None, preprocess=False
 
283
  except Exception:
284
  print("Removing " + opt + " from ASR options since it cannot be loaded.")
285
  ASR_options = ASR_options[:opt_count] + ASR_options[(opt_count + 1) :]
286
+ opt_count -=1
287
  if opt == ASR_name:
288
  ASR_name = ASR_options[0]
289
  for opt_count in range(len(LLM_options)):
 
346
  ASR_option: str,
347
  LLM_option: str,
348
  type_option: str,
349
+ input_text: str,
350
  ):
351
  """
352
  Processes and transcribes an audio stream in real-time.
 
422
  audio_output1 = None
423
  else:
424
  stream = np.concatenate((stream, y))
425
+ # import pdb;pdb.set_trace()
426
+ dialogue_model.chat.init_chat(
427
+ {
428
+ "role": "system",
429
+ "content": (
430
+ input_text
431
+ ),
432
+ }
433
+ )
434
  (
435
  asr_output_str,
436
  text_str,
 
523
  (https://github.com/siddhu001/espnet/tree/sds_demo_recipe/egs2/TEMPLATE/sds1#how-to-use).
524
  """
525
  )
526
+ default_instruct=(
527
+ "You are a helpful and friendly AI "
528
+ "assistant. "
529
+ "You are polite, respectful, and aim to "
530
+ "provide concise and complete responses of "
531
+ "less than 15 words."
532
+ )
533
  with gr.Row():
534
  with gr.Column(scale=1):
535
  user_audio = gr.Audio(
 
537
  streaming=True,
538
  waveform_options=gr.WaveformOptions(sample_rate=16000),
539
  )
540
+ input_text=gr.Textbox(
541
+ label="LLM prompt",
542
+ visible=True,
543
+ interactive=True,
544
+ value=default_instruct
545
+ )
546
  with gr.Row():
547
  type_radio = gr.Radio(
548
  choices=["Cascaded", "E2E"],
 
710
  )
711
  user_audio.stream(
712
  transcribe,
713
+ inputs=[state, user_audio, radio, ASR_radio, LLM_radio, type_radio, input_text],
714
  outputs=[state, output_asr_text, output_text, output_audio, output_audio1],
715
  ).then(
716
  lambda *args: callback.flag(list(args)), [user_audio], None, preprocess=False