VanYsa commited on
Commit
548c4d8
·
1 Parent(s): 474452f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -15
app.py CHANGED
@@ -21,7 +21,7 @@ from transformers import pipeline
21
  # Set an environment variable
22
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
23
 
24
-
25
  SAMPLE_RATE = 16000 # Hz
26
  MAX_AUDIO_SECONDS = 40 # wont try to transcribe if longer than this
27
  DESCRIPTION = '''
@@ -35,16 +35,10 @@ DESCRIPTION = '''
35
  '''
36
  PLACEHOLDER = """
37
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
38
- <img src="./MyAlexaLogo.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
39
  <p style="font-size: 28px; margin-bottom: 2px; opacity: 0.65;">What's on your mind?</p>
40
  </div>
41
  """
42
- # PLACEHOLDER = """
43
- # <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
44
- # <img src="https://i.ibb.co/S35q17Q/My-Alexa-Logo.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
45
- # <p style="font-size: 28px; margin-bottom: 2px; opacity: 0.65;">What's on your mind?</p>
46
- # </div>
47
- # """
48
 
49
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
 
@@ -73,6 +67,8 @@ terminators = [
73
  ### TTS model
74
  pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
75
 
 
 
76
  def convert_audio(audio_filepath, tmpdir, utt_id):
77
  """
78
  Convert all files to monochannel 16 kHz wav files.
@@ -197,10 +193,15 @@ def voice_player(history):
197
  Plays the generated response using the VITS-ljs model.
198
  Returns the audio player with the generated response.
199
  """
200
- _, text = history
201
  voice = pipe(text)
202
- voice = gr.Audio(value = (voice["sampling_rate"], voice["audio"].squeeze()), type="numpy", autoplay=True, label="MyAlexa Response", show_label=True,
203
- visible=True)
 
 
 
 
 
204
  return voice
205
 
206
 
@@ -226,7 +227,10 @@ with gr.Blocks(
226
  "<p><b>Step 1:</b> Upload an audio file or record with your microphone.</p>"
227
  )
228
 
229
- audio_file = gr.Audio(sources=["microphone", "upload"], type="filepath")
 
 
 
230
 
231
 
232
  with gr.Column():
@@ -238,7 +242,7 @@ with gr.Blocks(
238
  variant="primary"
239
  )
240
 
241
- chat_input = gr.Textbox(
242
  label="Transcribed text:",
243
  interactive=False,
244
  placeholder="Transcribed text will appear here.",
@@ -246,12 +250,12 @@ with gr.Blocks(
246
  visible=True # set to True to see processing time of asr transcription
247
  )
248
 
249
- out_audio = gr.Audio(
250
  value = None,
251
  label="Response Voice Player",
252
  show_label=True,
253
  visible=True # set to True to see processing time of tts audio generation
254
- )
255
 
256
  chat_msg = chat_input.change(add_message, [chatbot, chat_input], [chatbot], api_name="add_message_in_chatbot")
257
  bot_msg = chat_msg.then(bot, [chatbot, chat_input], chatbot, api_name="bot_response_in_chatbot")
 
21
  # Set an environment variable
22
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
23
 
24
+ # Variables
25
  SAMPLE_RATE = 16000 # Hz
26
  MAX_AUDIO_SECONDS = 40 # wont try to transcribe if longer than this
27
  DESCRIPTION = '''
 
35
  '''
36
  PLACEHOLDER = """
37
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
38
+ <img src="https://i.ibb.co/S35q17Q/My-Alexa-Logo.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
39
  <p style="font-size: 28px; margin-bottom: 2px; opacity: 0.65;">What's on your mind?</p>
40
  </div>
41
  """
 
 
 
 
 
 
42
 
43
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44
 
 
67
  ### TTS model
68
  pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
69
 
70
+
71
+
72
  def convert_audio(audio_filepath, tmpdir, utt_id):
73
  """
74
  Convert all files to monochannel 16 kHz wav files.
 
193
  Plays the generated response using the VITS-ljs model.
194
  Returns the audio player with the generated response.
195
  """
196
+ _, text = history[-1]
197
  voice = pipe(text)
198
+ voice = gr.Audio(value = (
199
+ voice["sampling_rate"],
200
+ voice["audio"].squeeze()),
201
+ type="numpy", autoplay=True,
202
+ label="MyAlexa Response",
203
+ show_label=True,
204
+ visible=True)
205
  return voice
206
 
207
 
 
227
  "<p><b>Step 1:</b> Upload an audio file or record with your microphone.</p>"
228
  )
229
 
230
+ audio_file = gr.Audio(
231
+ sources=["microphone", "upload"],
232
+ type="filepath"
233
+ )
234
 
235
 
236
  with gr.Column():
 
242
  variant="primary"
243
  )
244
 
245
+ chat_input = gr.Textbox( # Shows the transcribed text
246
  label="Transcribed text:",
247
  interactive=False,
248
  placeholder="Transcribed text will appear here.",
 
250
  visible=True # set to True to see processing time of asr transcription
251
  )
252
 
253
+ out_audio = gr.Audio( # Shows an audio player for the generated response
254
  value = None,
255
  label="Response Voice Player",
256
  show_label=True,
257
  visible=True # set to True to see processing time of tts audio generation
258
+ )
259
 
260
  chat_msg = chat_input.change(add_message, [chatbot, chat_input], [chatbot], api_name="add_message_in_chatbot")
261
  bot_msg = chat_msg.then(bot, [chatbot, chat_input], chatbot, api_name="bot_response_in_chatbot")