csukuangfj commited on
Commit
ff7ef47
·
1 Parent(s): b72035a

support microphone and url

Browse files
Files changed (1) hide show
  1. app.py +138 -17
app.py CHANGED
@@ -22,7 +22,9 @@
22
  import logging
23
  import os
24
  import shutil
 
25
  import time
 
26
  import uuid
27
  from datetime import datetime
28
 
@@ -94,28 +96,38 @@ def process_uploaded_file(
94
  "result_item_error",
95
  )
96
 
 
97
  try:
98
- input_num_speakers = int(input_num_speakers)
99
- except ValueError:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  return "", build_html_output(
101
- "Please set a valid number of speakers",
 
 
102
  "result_item_error",
103
  )
104
 
105
- if input_num_speakers <= 0:
106
- try:
107
- input_threshold = float(input_threshold)
108
- if input_threshold < 0 or input_threshold > 10:
109
- raise ValueError("")
110
- except ValueError:
111
- return "", build_html_output(
112
- "Please set a valid threshold between (0, 10)",
113
- "result_item_error",
114
- )
115
- else:
116
- input_threshold = 0
117
-
118
- MyPrint(f"Processing uploaded file: {in_filename}")
119
  try:
120
  return process(
121
  in_filename=in_filename,
@@ -130,6 +142,32 @@ def process_uploaded_file(
130
  return "", build_html_output(str(e), "result_item_error")
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  def process(
134
  embedding_framework: str,
135
  embedding_model: str,
@@ -145,6 +183,27 @@ def process(
145
  MyPrint(f"input_threshold: {input_threshold}")
146
  MyPrint(f"in_filename: {in_filename}")
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  filename = convert_to_wav(in_filename)
149
 
150
  now = datetime.now()
@@ -313,6 +372,42 @@ with demo:
313
  outputs=[uploaded_output, uploaded_html_info],
314
  fn=process_uploaded_file,
315
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
  upload_button.click(
318
  process_uploaded_file,
@@ -327,6 +422,32 @@ with demo:
327
  outputs=[uploaded_output, uploaded_html_info],
328
  )
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  gr.Markdown(description)
331
 
332
  if __name__ == "__main__":
 
22
  import logging
23
  import os
24
  import shutil
25
+ import tempfile
26
  import time
27
+ import urllib.request
28
  import uuid
29
  from datetime import datetime
30
 
 
96
  "result_item_error",
97
  )
98
 
99
+ MyPrint(f"Processing uploaded file: {in_filename}")
100
  try:
101
+ return process(
102
+ in_filename=in_filename,
103
+ embedding_framework=embedding_framework,
104
+ embedding_model=embedding_model,
105
+ speaker_segmentation_model=speaker_segmentation_model,
106
+ input_num_speakers=input_num_speakers,
107
+ input_threshold=input_threshold,
108
+ )
109
+ except Exception as e:
110
+ MyPrint(str(e))
111
+ return "", build_html_output(str(e), "result_item_error")
112
+
113
+
114
+ def process_microphone(
115
+ embedding_framework: str,
116
+ embedding_model: str,
117
+ speaker_segmentation_model: str,
118
+ input_num_speakers: str,
119
+ input_threshold: str,
120
+ in_filename: str,
121
+ ):
122
+ if in_filename is None or in_filename == "":
123
  return "", build_html_output(
124
+ "Please first click 'Record from microphone', speak, "
125
+ "click 'Stop recording', and then "
126
+ "click the button 'submit for speaker diarization'",
127
  "result_item_error",
128
  )
129
 
130
+ MyPrint(f"Processing microphone: {in_filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  try:
132
  return process(
133
  in_filename=in_filename,
 
142
  return "", build_html_output(str(e), "result_item_error")
143
 
144
 
145
+ def process_url(
146
+ embedding_framework: str,
147
+ embedding_model: str,
148
+ speaker_segmentation_model: str,
149
+ input_num_speakers: str,
150
+ input_threshold: str,
151
+ url: str,
152
+ ):
153
+ MyPrint(f"Processing URL: {url}")
154
+ with tempfile.NamedTemporaryFile() as f:
155
+ try:
156
+ urllib.request.urlretrieve(url, f.name)
157
+
158
+ return process(
159
+ in_filename=f.name,
160
+ embedding_framework=embedding_framework,
161
+ embedding_model=embedding_model,
162
+ speaker_segmentation_model=speaker_segmentation_model,
163
+ input_num_speakers=input_num_speakers,
164
+ input_threshold=input_threshold,
165
+ )
166
+ except Exception as e:
167
+ MyPrint(str(e))
168
+ return "", build_html_output(str(e), "result_item_error")
169
+
170
+
171
  def process(
172
  embedding_framework: str,
173
  embedding_model: str,
 
183
  MyPrint(f"input_threshold: {input_threshold}")
184
  MyPrint(f"in_filename: {in_filename}")
185
 
186
+ try:
187
+ input_num_speakers = int(input_num_speakers)
188
+ except ValueError:
189
+ return "", build_html_output(
190
+ "Please set a valid number of speakers",
191
+ "result_item_error",
192
+ )
193
+
194
+ if input_num_speakers <= 0:
195
+ try:
196
+ input_threshold = float(input_threshold)
197
+ if input_threshold < 0 or input_threshold > 10:
198
+ raise ValueError("")
199
+ except ValueError:
200
+ return "", build_html_output(
201
+ "Please set a valid threshold between (0, 10)",
202
+ "result_item_error",
203
+ )
204
+ else:
205
+ input_threshold = 0
206
+
207
  filename = convert_to_wav(in_filename)
208
 
209
  now = datetime.now()
 
372
  outputs=[uploaded_output, uploaded_html_info],
373
  fn=process_uploaded_file,
374
  )
375
+ with gr.TabItem("Record from microphone"):
376
+ microphone = gr.Audio(
377
+ sources=["microphone"], # Choose between "microphone", "upload"
378
+ type="filepath",
379
+ label="Record from microphone",
380
+ )
381
+
382
+ record_button = gr.Button("Submit for speaker diarization")
383
+ recorded_output = gr.Textbox(label="Result from recordings")
384
+ recorded_html_info = gr.HTML(label="Info")
385
+
386
+ gr.Examples(
387
+ examples=examples,
388
+ inputs=[
389
+ embedding_framework_radio,
390
+ embedding_model_dropdown,
391
+ speaker_segmentation_model_dropdown,
392
+ input_num_speakers,
393
+ input_threshold,
394
+ microphone,
395
+ ],
396
+ outputs=[recorded_output, recorded_html_info],
397
+ fn=process_microphone,
398
+ )
399
+
400
+ with gr.TabItem("From URL"):
401
+ url_textbox = gr.Textbox(
402
+ max_lines=1,
403
+ placeholder="URL to an audio file",
404
+ label="URL",
405
+ interactive=True,
406
+ )
407
+
408
+ url_button = gr.Button("Submit for speaker diarization")
409
+ url_output = gr.Textbox(label="Result from URL")
410
+ url_html_info = gr.HTML(label="Info")
411
 
412
  upload_button.click(
413
  process_uploaded_file,
 
422
  outputs=[uploaded_output, uploaded_html_info],
423
  )
424
 
425
+ record_button.click(
426
+ process_microphone,
427
+ inputs=[
428
+ embedding_framework_radio,
429
+ embedding_model_dropdown,
430
+ speaker_segmentation_model_dropdown,
431
+ input_num_speakers,
432
+ input_threshold,
433
+ microphone,
434
+ ],
435
+ outputs=[recorded_output, recorded_html_info],
436
+ )
437
+
438
+ url_button.click(
439
+ process_url,
440
+ inputs=[
441
+ embedding_framework_radio,
442
+ embedding_model_dropdown,
443
+ speaker_segmentation_model_dropdown,
444
+ input_num_speakers,
445
+ input_threshold,
446
+ url_textbox,
447
+ ],
448
+ outputs=[url_output, url_html_info],
449
+ )
450
+
451
  gr.Markdown(description)
452
 
453
  if __name__ == "__main__":