ftshijt commited on
Commit
a9a0fbe
·
1 Parent(s): 1c976eb

Update model

Browse files
Files changed (43) hide show
  1. README.md +794 -3
  2. data/token_list/bpe_unigram500/bpe.model +3 -0
  3. dump_ark/raw/train_update/metric2id +13 -0
  4. meta.yaml +8 -0
  5. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/7epoch.pth +3 -0
  6. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/config.yaml +719 -0
  7. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/backward_time.png +0 -0
  8. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/clip.png +0 -0
  9. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/dns_p808_l1.png +0 -0
  10. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/dns_p808_overall.png +0 -0
  11. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0corr_l1.png +0 -0
  12. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0corr_overall.png +0 -0
  13. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0rmse_l1.png +0 -0
  14. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0rmse_overall.png +0 -0
  15. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/forward_time.png +0 -0
  16. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/gpu_max_cached_mem_GB.png +0 -0
  17. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/grad_norm.png +0 -0
  18. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/iter_time.png +0 -0
  19. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/loss.png +0 -0
  20. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/loss_scale.png +0 -0
  21. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mcd_l1.png +0 -0
  22. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mcd_overall.png +0 -0
  23. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mos_l1.png +0 -0
  24. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mos_overall.png +0 -0
  25. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/optim0_lr0.png +0 -0
  26. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/optim_step_time.png +0 -0
  27. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/pesq_l1.png +0 -0
  28. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/pesq_overall.png +0 -0
  29. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/sheet_ssqa_l1.png +0 -0
  30. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/sheet_ssqa_overall.png +0 -0
  31. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/si_snr_l1.png +0 -0
  32. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/si_snr_overall.png +0 -0
  33. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/speech_bert_l1.png +0 -0
  34. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/speech_bert_overall.png +0 -0
  35. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/spk_similarity_l1.png +0 -0
  36. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/spk_similarity_overall.png +0 -0
  37. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/stoi_l1.png +0 -0
  38. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/stoi_overall.png +0 -0
  39. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/train_time.png +0 -0
  40. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/utmos_l1.png +0 -0
  41. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/utmos_overall.png +0 -0
  42. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/wer_l1.png +0 -0
  43. update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/wer_overall.png +0 -0
README.md CHANGED
@@ -1,3 +1,794 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - universa
6
+ language: multilingual
7
+ datasets:
8
+ - urgent24
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 universa model
13
+
14
+ ### `espnet/universa-wavlm_base_urgent24_multi-metric_fullref`
15
+
16
+ This model was trained by ftshijt using urgent24 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 5dbbd4cec6d4ccdd840484207951770027a8d4b8
26
+ pip install -e .
27
+ cd egs2/urgent24/uni_versa1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/universa-wavlm_base_urgent24_multi-metric_fullref
29
+ ```
30
+
31
+
32
+
33
+ ## universa config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: conf/train_universa_wavlm_freeze.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ drop_last_iter: false
42
+ dry_run: false
43
+ iterator_type: sequence
44
+ valid_iterator_type: null
45
+ output_dir: update_exp/universa_train_universa_wavlm_freeze_raw_fs16000
46
+ ngpu: 1
47
+ seed: 777
48
+ num_workers: 1
49
+ num_att_plot: 0
50
+ dist_backend: nccl
51
+ dist_init_method: env://
52
+ dist_world_size: null
53
+ dist_rank: null
54
+ local_rank: 0
55
+ dist_master_addr: null
56
+ dist_master_port: null
57
+ dist_launcher: null
58
+ multiprocessing_distributed: false
59
+ unused_parameters: false
60
+ sharded_ddp: false
61
+ use_deepspeed: false
62
+ deepspeed_config: null
63
+ cudnn_enabled: true
64
+ cudnn_benchmark: false
65
+ cudnn_deterministic: false
66
+ use_tf32: false
67
+ collect_stats: false
68
+ write_collected_feats: false
69
+ max_epoch: 100
70
+ patience: null
71
+ val_scheduler_criterion:
72
+ - valid
73
+ - loss
74
+ early_stopping_criterion:
75
+ - valid
76
+ - loss
77
+ - min
78
+ best_model_criterion:
79
+ - - train
80
+ - loss
81
+ - min
82
+ - - valid
83
+ - loss
84
+ - min
85
+ - - train
86
+ - acc
87
+ - max
88
+ - - valid
89
+ - acc
90
+ - max
91
+ keep_nbest_models: 5
92
+ nbest_averaging_interval: 0
93
+ grad_clip: -1
94
+ grad_clip_type: 2.0
95
+ grad_noise: false
96
+ accum_grad: 1
97
+ no_forward_run: false
98
+ resume: true
99
+ train_dtype: float32
100
+ use_amp: false
101
+ log_interval: 50
102
+ use_matplotlib: true
103
+ use_tensorboard: true
104
+ create_graph_in_tensorboard: false
105
+ use_wandb: false
106
+ wandb_project: null
107
+ wandb_id: null
108
+ wandb_entity: null
109
+ wandb_name: null
110
+ wandb_model_log_interval: -1
111
+ detect_anomaly: false
112
+ use_adapter: false
113
+ adapter: lora
114
+ save_strategy: all
115
+ adapter_conf: {}
116
+ pretrain_path: null
117
+ init_param: []
118
+ ignore_init_mismatch: false
119
+ freeze_param:
120
+ - frontend.upstream
121
+ num_iters_per_epoch: null
122
+ batch_size: 16
123
+ valid_batch_size: null
124
+ batch_bins: 1000000
125
+ valid_batch_bins: null
126
+ category_sample_size: 10
127
+ train_shape_file:
128
+ - update_exp/universa_stats_raw/train/audio_shape
129
+ - update_exp/universa_stats_raw/train/ref_audio_shape
130
+ - update_exp/universa_stats_raw/train/ref_text_shape
131
+ valid_shape_file:
132
+ - update_exp/universa_stats_raw/valid/audio_shape
133
+ - update_exp/universa_stats_raw/valid/ref_audio_shape
134
+ - update_exp/universa_stats_raw/valid/ref_text_shape
135
+ batch_type: sorted
136
+ valid_batch_type: null
137
+ fold_length:
138
+ - 256000
139
+ sort_in_batch: descending
140
+ shuffle_within_batch: false
141
+ sort_batch: descending
142
+ multiple_iterator: false
143
+ chunk_length: 500
144
+ chunk_shift_ratio: 0.5
145
+ num_cache_chunks: 1024
146
+ chunk_excluded_key_prefixes: []
147
+ chunk_default_fs: null
148
+ chunk_max_abs_length: null
149
+ chunk_discard_short_samples: true
150
+ train_data_path_and_name_and_type:
151
+ - - dump_ark/raw/train_update/wav.scp
152
+ - audio
153
+ - kaldi_ark
154
+ - - dump_ark/raw/train_update/metric.scp
155
+ - metrics
156
+ - metric
157
+ - - dump_ark/raw/train_update/ref_wav.scp
158
+ - ref_audio
159
+ - kaldi_ark
160
+ - - dump_ark/raw/train_update/text
161
+ - ref_text
162
+ - text
163
+ valid_data_path_and_name_and_type:
164
+ - - dump_ark/raw/dev_update/wav.scp
165
+ - audio
166
+ - kaldi_ark
167
+ - - dump_ark/raw/dev_update/metric.scp
168
+ - metrics
169
+ - metric
170
+ - - dump_ark/raw/dev_update/ref_wav.scp
171
+ - ref_audio
172
+ - kaldi_ark
173
+ - - dump_ark/raw/dev_update/text
174
+ - ref_text
175
+ - text
176
+ multi_task_dataset: false
177
+ allow_variable_data_keys: false
178
+ max_cache_size: 0.0
179
+ max_cache_fd: 32
180
+ allow_multi_rates: false
181
+ valid_max_cache_size: null
182
+ exclude_weight_decay: false
183
+ exclude_weight_decay_conf: {}
184
+ optim: adamw
185
+ optim_conf:
186
+ lr: 0.001
187
+ scheduler: warmuplr
188
+ scheduler_conf:
189
+ warmup_steps: 25000
190
+ metric2id: dump_ark/raw/train_update/metric2id
191
+ metric2type: null
192
+ metric_pad_value: -100
193
+ token_list:
194
+ - <blank>
195
+ - <unk>
196
+ - s
197
+ - ▁
198
+ - t
199
+ - e
200
+ - ▁the
201
+ - i
202
+ - a
203
+ - o
204
+ - ▁a
205
+ - r
206
+ - ▁to
207
+ - d
208
+ - ▁and
209
+ - ''''
210
+ - m
211
+ - n
212
+ - ing
213
+ - u
214
+ - y
215
+ - p
216
+ - c
217
+ - ▁of
218
+ - l
219
+ - ed
220
+ - ▁I
221
+ - ▁in
222
+ - er
223
+ - re
224
+ - ▁it
225
+ - ▁you
226
+ - ar
227
+ - ▁f
228
+ - ▁is
229
+ - ▁that
230
+ - ','
231
+ - .
232
+ - in
233
+ - al
234
+ - g
235
+ - 'on'
236
+ - ▁b
237
+ - b
238
+ - or
239
+ - ▁c
240
+ - ▁s
241
+ - f
242
+ - h
243
+ - ▁we
244
+ - an
245
+ - en
246
+ - ▁for
247
+ - le
248
+ - ▁p
249
+ - ly
250
+ - es
251
+ - w
252
+ - ▁re
253
+ - ▁on
254
+ - ▁m
255
+ - ▁be
256
+ - ic
257
+ - ll
258
+ - th
259
+ - ▁he
260
+ - k
261
+ - ur
262
+ - ve
263
+ - ▁with
264
+ - ▁so
265
+ - ▁from
266
+ - ▁was
267
+ - v
268
+ - ch
269
+ - st
270
+ - ▁w
271
+ - ▁i
272
+ - ▁this
273
+ - ▁de
274
+ - ▁like
275
+ - ▁do
276
+ - ce
277
+ - at
278
+ - il
279
+ - ck
280
+ - ▁A
281
+ - ▁have
282
+ - ▁not
283
+ - ad
284
+ - ▁st
285
+ - ow
286
+ - ro
287
+ - ne
288
+ - ▁me
289
+ - ▁my
290
+ - ▁but
291
+ - ation
292
+ - ▁at
293
+ - ▁or
294
+ - '-'
295
+ - ter
296
+ - ent
297
+ - ▁B
298
+ - ▁n
299
+ - ▁know
300
+ - ▁t
301
+ - out
302
+ - ▁are
303
+ - nd
304
+ - ▁one
305
+ - ▁li
306
+ - ▁g
307
+ - ▁The
308
+ - ol
309
+ - ion
310
+ - te
311
+ - ▁go
312
+ - ut
313
+ - ▁as
314
+ - ▁just
315
+ - as
316
+ - ▁sh
317
+ - ▁they
318
+ - is
319
+ - ▁C
320
+ - et
321
+ - ▁h
322
+ - ▁an
323
+ - ▁there
324
+ - ▁up
325
+ - ▁S
326
+ - ▁M
327
+ - ▁she
328
+ - ▁by
329
+ - ▁su
330
+ - om
331
+ - ▁can
332
+ - us
333
+ - ▁your
334
+ - ng
335
+ - ▁con
336
+ - el
337
+ - ▁us
338
+ - ment
339
+ - z
340
+ - ▁see
341
+ - ▁ab
342
+ - ▁what
343
+ - ▁out
344
+ - ▁her
345
+ - me
346
+ - ate
347
+ - ▁all
348
+ - ▁th
349
+ - ▁if
350
+ - ▁right
351
+ - ▁his
352
+ - ▁ma
353
+ - ▁lo
354
+ - ▁which
355
+ - ide
356
+ - ▁P
357
+ - ▁more
358
+ - ▁then
359
+ - ul
360
+ - ast
361
+ - x
362
+ - ight
363
+ - ill
364
+ - ▁So
365
+ - ▁sp
366
+ - ▁going
367
+ - ▁some
368
+ - ure
369
+ - ▁their
370
+ - ig
371
+ - ▁no
372
+ - ▁ro
373
+ - ▁think
374
+ - ▁who
375
+ - ▁pro
376
+ - ver
377
+ - ive
378
+ - est
379
+ - ▁co
380
+ - ▁di
381
+ - '0'
382
+ - ist
383
+ - ▁k
384
+ - age
385
+ - ▁d
386
+ - ▁time
387
+ - ▁L
388
+ - ies
389
+ - ▁will
390
+ - ▁man
391
+ - ▁when
392
+ - ▁D
393
+ - les
394
+ - ▁F
395
+ - ▁want
396
+ - ff
397
+ - ity
398
+ - ▁un
399
+ - '?'
400
+ - ▁start
401
+ - ▁G
402
+ - ▁uh
403
+ - ▁get
404
+ - ok
405
+ - ▁take
406
+ - ▁po
407
+ - li
408
+ - ▁ho
409
+ - ▁way
410
+ - ▁don
411
+ - ▁yeah
412
+ - ▁really
413
+ - ▁say
414
+ - ▁look
415
+ - ▁good
416
+ - ▁ra
417
+ - ▁pr
418
+ - ▁had
419
+ - ttle
420
+ - ▁comp
421
+ - ort
422
+ - ish
423
+ - ▁ex
424
+ - ally
425
+ - ▁sa
426
+ - ▁how
427
+ - end
428
+ - ant
429
+ - ▁O
430
+ - ▁um
431
+ - way
432
+ - ance
433
+ - ▁other
434
+ - ▁two
435
+ - ine
436
+ - ever
437
+ - able
438
+ - ▁com
439
+ - other
440
+ - ▁first
441
+ - ▁back
442
+ - ▁al
443
+ - ers
444
+ - ions
445
+ - ▁now
446
+ - ▁off
447
+ - ning
448
+ - ▁down
449
+ - ▁has
450
+ - ▁than
451
+ - ▁car
452
+ - ▁Th
453
+ - very
454
+ - ice
455
+ - ▁dr
456
+ - ▁been
457
+ - ▁him
458
+ - ▁here
459
+ - ated
460
+ - '5'
461
+ - ▁hand
462
+ - ▁day
463
+ - ▁hear
464
+ - each
465
+ - ▁would
466
+ - ▁over
467
+ - ▁oh
468
+ - ▁cha
469
+ - ood
470
+ - ▁did
471
+ - ugh
472
+ - ▁per
473
+ - ▁let
474
+ - ▁str
475
+ - ▁tra
476
+ - ▁got
477
+ - ext
478
+ - '1'
479
+ - ▁We
480
+ - ▁Shields
481
+ - ▁come
482
+ - ▁should
483
+ - ▁could
484
+ - light
485
+ - '2'
486
+ - ▁people
487
+ - ▁again
488
+ - ▁year
489
+ - ▁app
490
+ - ▁into
491
+ - ▁any
492
+ - ▁N
493
+ - ▁mean
494
+ - ▁o
495
+ - ▁mus
496
+ - ▁lot
497
+ - ▁said
498
+ - ▁long
499
+ - ▁these
500
+ - ▁lea
501
+ - sh
502
+ - ▁vi
503
+ - ▁part
504
+ - ▁every
505
+ - ▁our
506
+ - ▁You
507
+ - ious
508
+ - ▁fight
509
+ - ▁Ch
510
+ - ark
511
+ - ▁may
512
+ - ▁Hammer
513
+ - ▁because
514
+ - ▁most
515
+ - ▁came
516
+ - ▁four
517
+ - ful
518
+ - ▁No
519
+ - ize
520
+ - ▁where
521
+ - ▁okay
522
+ - ▁much
523
+ - ▁ask
524
+ - ▁through
525
+ - ▁before
526
+ - ▁work
527
+ - ▁even
528
+ - ▁three
529
+ - mber
530
+ - ▁win
531
+ - ▁flight
532
+ - ake
533
+ - K
534
+ - ▁place
535
+ - ▁play
536
+ - ▁though
537
+ - ▁pound
538
+ - ▁bit
539
+ - land
540
+ - ▁va
541
+ - ▁talk
542
+ - ▁kind
543
+ - ▁Line
544
+ - ▁make
545
+ - hap
546
+ - ▁big
547
+ - ▁leav
548
+ - ▁something
549
+ - ▁game
550
+ - ▁under
551
+ - ▁feel
552
+ - self
553
+ - ▁give
554
+ - ▁includ
555
+ - U
556
+ - ▁twenty
557
+ - ▁guard
558
+ - ▁left
559
+ - ▁round
560
+ - ▁great
561
+ - body
562
+ - ▁gra
563
+ - ress
564
+ - lso
565
+ - '3'
566
+ - ▁everything
567
+ - ▁those
568
+ - ▁after
569
+ - ▁tell
570
+ - ▁need
571
+ - ▁yes
572
+ - qua
573
+ - ham
574
+ - ▁minutes
575
+ - ▁question
576
+ - ▁around
577
+ - ▁punch
578
+ - ▁course
579
+ - ▁gonna
580
+ - ▁person
581
+ - ▁move
582
+ - ▁plan
583
+ - ▁ear
584
+ - ept
585
+ - ▁Airport
586
+ - ▁Okay
587
+ - ▁found
588
+ - ▁seven
589
+ - ▁help
590
+ - que
591
+ - ▁qui
592
+ - ▁keep
593
+ - ▁guys
594
+ - ▁house
595
+ - ▁run
596
+ - ▁turn
597
+ - ▁better
598
+ - ▁stop
599
+ - ward
600
+ - ddle
601
+ - ▁second
602
+ - ground
603
+ - ▁world
604
+ - ▁high
605
+ - ▁point
606
+ - ▁hold
607
+ - ▁call
608
+ - '6'
609
+ - ▁actually
610
+ - ▁probably
611
+ - ▁heaven
612
+ - ▁speci
613
+ - ▁everyone
614
+ - ▁why
615
+ - ▁presen
616
+ - ▁thir
617
+ - lright
618
+ - ▁eye
619
+ - eath
620
+ - ▁Tak
621
+ - '!'
622
+ - '"'
623
+ - '4'
624
+ - ▁hundred
625
+ - ▁answer
626
+ - ▁small
627
+ - ▁wait
628
+ - ▁nothing
629
+ - q
630
+ - '8'
631
+ - V
632
+ - ▁countr
633
+ - ▁problem
634
+ - ▁continu
635
+ - ▁close
636
+ - ▁priva
637
+ - ▁20
638
+ - ▁pleas
639
+ - ▁walk
640
+ - ▁open
641
+ - ▁lay
642
+ - ▁Station
643
+ - ▁moment
644
+ - ▁Yeah
645
+ - ▁public
646
+ - possibl
647
+ - ▁happen
648
+ - together
649
+ - ▁while
650
+ - asically
651
+ - ▁money
652
+ - ▁wrong
653
+ - B
654
+ - ▁puzzle
655
+ - '7'
656
+ - ▁journ
657
+ - ▁rainbow
658
+ - ▁thousand
659
+ - I
660
+ - '9'
661
+ - S
662
+ - P
663
+ - '%'
664
+ - A
665
+ - D
666
+ - L
667
+ - F
668
+ - ’
669
+ - O
670
+ - G
671
+ - N
672
+ - á
673
+ - C
674
+ - $
675
+ - Z
676
+ - Y
677
+ - R
678
+ - E
679
+ - J
680
+ - W
681
+ - M
682
+ - H
683
+ - j
684
+ - –
685
+ - ;
686
+ - Q
687
+ - X
688
+ - ']'
689
+ - −
690
+ - '&'
691
+ - T
692
+ - '['
693
+ - <sos/eos>
694
+ init: xavier_uniform
695
+ model_conf: {}
696
+ use_ref_audio: true
697
+ use_ref_text: true
698
+ use_preprocessor: true
699
+ token_type: bpe
700
+ bpemodel: data/token_list/bpe_unigram500/bpe.model
701
+ non_linguistic_symbols: null
702
+ cleaner: null
703
+ g2p: null
704
+ frontend: s3prl
705
+ frontend_conf:
706
+ frontend_conf:
707
+ upstream: wavlm_large
708
+ download_dir: ./hub
709
+ multilayer_feature: true
710
+ universa: base
711
+ universa_conf:
712
+ embedding_dim: 256
713
+ audio_encoder_type: transformer
714
+ audio_encoder_params:
715
+ num_blocks: 4
716
+ attention_heads: 4
717
+ linear_units: 1024
718
+ dropout_rate: 0.1
719
+ positional_dropout_rate: 0.1
720
+ attention_dropout_rate: 0.1
721
+ input_layer: conv2d
722
+ normalize_before: true
723
+ concat_after: false
724
+ positionwise_layer_type: linear
725
+ positionwise_conv_kernel_size: 1
726
+ layer_drop_rate: 0.1
727
+ qk_norm: false
728
+ use_flash_attn: false
729
+ text_encoder_type: transformer
730
+ text_encoder_params:
731
+ num_blocks: 4
732
+ attention_heads: 4
733
+ linear_units: 1024
734
+ dropout_rate: 0.1
735
+ positional_dropout_rate: 0.1
736
+ attention_dropout_rate: 0.1
737
+ input_layer: linear
738
+ normalize_before: true
739
+ concat_after: false
740
+ positionwise_layer_type: linear
741
+ positionwise_conv_kernel_size: 1
742
+ layer_drop_rate: 0.1
743
+ qk_norm: false
744
+ use_flash_attn: false
745
+ cross_attention_type: multihead
746
+ cross_attention_params:
747
+ n_head: 4
748
+ dropout_rate: 0.1
749
+ pooling_type: mean
750
+ projector_type: linear
751
+ multi_branch: true
752
+ required:
753
+ - output_dir
754
+ - metric2id
755
+ version: '202412'
756
+ distributed: false
757
+ ```
758
+
759
+ </details>
760
+
761
+
762
+
763
+ ### Citing ESPnet
764
+
765
+ ```BibTex
766
+ @inproceedings{watanabe2018espnet,
767
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
768
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
769
+ year={2018},
770
+ booktitle={Proceedings of Interspeech},
771
+ pages={2207--2211},
772
+ doi={10.21437/Interspeech.2018-1456},
773
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
774
+ }
775
+
776
+
777
+
778
+
779
+
780
+
781
+ ```
782
+
783
+ or arXiv:
784
+
785
+ ```bibtex
786
+ @misc{watanabe2018espnet,
787
+ title={ESPnet: End-to-End Speech Processing Toolkit},
788
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
789
+ year={2018},
790
+ eprint={1804.00015},
791
+ archivePrefix={arXiv},
792
+ primaryClass={cs.CL}
793
+ }
794
+ ```
data/token_list/bpe_unigram500/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9049e379dea3f04985ae169064ce46e7eae56316e5e38b9ae4476851054b062a
3
+ size 244630
dump_ark/raw/train_update/metric2id ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dns_p808
2
+ f0corr
3
+ f0rmse
4
+ mcd
5
+ mos
6
+ pesq
7
+ sheet_ssqa
8
+ si_snr
9
+ speech_bert
10
+ spk_similarity
11
+ stoi
12
+ utmos
13
+ wer
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202412'
2
+ files:
3
+ model_file: update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/7epoch.pth
4
+ python: "3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) \n[GCC 12.3.0]"
5
+ timestamp: 1740672071.913292
6
+ torch: 2.6.0.dev20241210+cu124
7
+ yaml_files:
8
+ train_config: update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/config.yaml
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/7epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:961c3ed509b4f26b38da5ef5c3d8f3f298644cb4041ab3b6ce67e50d39d0f268
3
+ size 1923127688
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/config.yaml ADDED
@@ -0,0 +1,719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_universa_wavlm_freeze.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: update_exp/universa_train_universa_wavlm_freeze_raw_fs16000
9
+ ngpu: 1
10
+ seed: 777
11
+ num_workers: 1
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ cudnn_enabled: true
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: false
29
+ use_tf32: false
30
+ collect_stats: false
31
+ write_collected_feats: false
32
+ max_epoch: 100
33
+ patience: null
34
+ val_scheduler_criterion:
35
+ - valid
36
+ - loss
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ best_model_criterion:
42
+ - - train
43
+ - loss
44
+ - min
45
+ - - valid
46
+ - loss
47
+ - min
48
+ - - train
49
+ - acc
50
+ - max
51
+ - - valid
52
+ - acc
53
+ - max
54
+ keep_nbest_models: 5
55
+ nbest_averaging_interval: 0
56
+ grad_clip: -1
57
+ grad_clip_type: 2.0
58
+ grad_noise: false
59
+ accum_grad: 1
60
+ no_forward_run: false
61
+ resume: true
62
+ train_dtype: float32
63
+ use_amp: false
64
+ log_interval: 50
65
+ use_matplotlib: true
66
+ use_tensorboard: true
67
+ create_graph_in_tensorboard: false
68
+ use_wandb: false
69
+ wandb_project: null
70
+ wandb_id: null
71
+ wandb_entity: null
72
+ wandb_name: null
73
+ wandb_model_log_interval: -1
74
+ detect_anomaly: false
75
+ use_adapter: false
76
+ adapter: lora
77
+ save_strategy: all
78
+ adapter_conf: {}
79
+ pretrain_path: null
80
+ init_param: []
81
+ ignore_init_mismatch: false
82
+ freeze_param:
83
+ - frontend.upstream
84
+ num_iters_per_epoch: null
85
+ batch_size: 16
86
+ valid_batch_size: null
87
+ batch_bins: 1000000
88
+ valid_batch_bins: null
89
+ category_sample_size: 10
90
+ train_shape_file:
91
+ - update_exp/universa_stats_raw/train/audio_shape
92
+ - update_exp/universa_stats_raw/train/ref_audio_shape
93
+ - update_exp/universa_stats_raw/train/ref_text_shape
94
+ valid_shape_file:
95
+ - update_exp/universa_stats_raw/valid/audio_shape
96
+ - update_exp/universa_stats_raw/valid/ref_audio_shape
97
+ - update_exp/universa_stats_raw/valid/ref_text_shape
98
+ batch_type: sorted
99
+ valid_batch_type: null
100
+ fold_length:
101
+ - 256000
102
+ sort_in_batch: descending
103
+ shuffle_within_batch: false
104
+ sort_batch: descending
105
+ multiple_iterator: false
106
+ chunk_length: 500
107
+ chunk_shift_ratio: 0.5
108
+ num_cache_chunks: 1024
109
+ chunk_excluded_key_prefixes: []
110
+ chunk_default_fs: null
111
+ chunk_max_abs_length: null
112
+ chunk_discard_short_samples: true
113
+ train_data_path_and_name_and_type:
114
+ - - dump_ark/raw/train_update/wav.scp
115
+ - audio
116
+ - kaldi_ark
117
+ - - dump_ark/raw/train_update/metric.scp
118
+ - metrics
119
+ - metric
120
+ - - dump_ark/raw/train_update/ref_wav.scp
121
+ - ref_audio
122
+ - kaldi_ark
123
+ - - dump_ark/raw/train_update/text
124
+ - ref_text
125
+ - text
126
+ valid_data_path_and_name_and_type:
127
+ - - dump_ark/raw/dev_update/wav.scp
128
+ - audio
129
+ - kaldi_ark
130
+ - - dump_ark/raw/dev_update/metric.scp
131
+ - metrics
132
+ - metric
133
+ - - dump_ark/raw/dev_update/ref_wav.scp
134
+ - ref_audio
135
+ - kaldi_ark
136
+ - - dump_ark/raw/dev_update/text
137
+ - ref_text
138
+ - text
139
+ multi_task_dataset: false
140
+ allow_variable_data_keys: false
141
+ max_cache_size: 0.0
142
+ max_cache_fd: 32
143
+ allow_multi_rates: false
144
+ valid_max_cache_size: null
145
+ exclude_weight_decay: false
146
+ exclude_weight_decay_conf: {}
147
+ optim: adamw
148
+ optim_conf:
149
+ lr: 0.001
150
+ scheduler: warmuplr
151
+ scheduler_conf:
152
+ warmup_steps: 25000
153
+ metric2id: dump_ark/raw/train_update/metric2id
154
+ metric2type: null
155
+ metric_pad_value: -100
156
+ token_list:
157
+ - <blank>
158
+ - <unk>
159
+ - s
160
+ - ▁
161
+ - t
162
+ - e
163
+ - ▁the
164
+ - i
165
+ - a
166
+ - o
167
+ - ▁a
168
+ - r
169
+ - ▁to
170
+ - d
171
+ - ▁and
172
+ - ''''
173
+ - m
174
+ - n
175
+ - ing
176
+ - u
177
+ - y
178
+ - p
179
+ - c
180
+ - ▁of
181
+ - l
182
+ - ed
183
+ - ▁I
184
+ - ▁in
185
+ - er
186
+ - re
187
+ - ▁it
188
+ - ▁you
189
+ - ar
190
+ - ▁f
191
+ - ▁is
192
+ - ▁that
193
+ - ','
194
+ - .
195
+ - in
196
+ - al
197
+ - g
198
+ - 'on'
199
+ - ▁b
200
+ - b
201
+ - or
202
+ - ▁c
203
+ - ▁s
204
+ - f
205
+ - h
206
+ - ▁we
207
+ - an
208
+ - en
209
+ - ▁for
210
+ - le
211
+ - ▁p
212
+ - ly
213
+ - es
214
+ - w
215
+ - ▁re
216
+ - ▁on
217
+ - ▁m
218
+ - ▁be
219
+ - ic
220
+ - ll
221
+ - th
222
+ - ▁he
223
+ - k
224
+ - ur
225
+ - ve
226
+ - ▁with
227
+ - ▁so
228
+ - ▁from
229
+ - ▁was
230
+ - v
231
+ - ch
232
+ - st
233
+ - ▁w
234
+ - ▁i
235
+ - ▁this
236
+ - ▁de
237
+ - ▁like
238
+ - ▁do
239
+ - ce
240
+ - at
241
+ - il
242
+ - ck
243
+ - ▁A
244
+ - ▁have
245
+ - ▁not
246
+ - ad
247
+ - ▁st
248
+ - ow
249
+ - ro
250
+ - ne
251
+ - ▁me
252
+ - ▁my
253
+ - ▁but
254
+ - ation
255
+ - ▁at
256
+ - ▁or
257
+ - '-'
258
+ - ter
259
+ - ent
260
+ - ▁B
261
+ - ▁n
262
+ - ▁know
263
+ - ▁t
264
+ - out
265
+ - ▁are
266
+ - nd
267
+ - ▁one
268
+ - ▁li
269
+ - ▁g
270
+ - ▁The
271
+ - ol
272
+ - ion
273
+ - te
274
+ - ▁go
275
+ - ut
276
+ - ▁as
277
+ - ▁just
278
+ - as
279
+ - ▁sh
280
+ - ▁they
281
+ - is
282
+ - ▁C
283
+ - et
284
+ - ▁h
285
+ - ▁an
286
+ - ▁there
287
+ - ▁up
288
+ - ▁S
289
+ - ▁M
290
+ - ▁she
291
+ - ▁by
292
+ - ▁su
293
+ - om
294
+ - ▁can
295
+ - us
296
+ - ▁your
297
+ - ng
298
+ - ▁con
299
+ - el
300
+ - ▁us
301
+ - ment
302
+ - z
303
+ - ▁see
304
+ - ▁ab
305
+ - ▁what
306
+ - ▁out
307
+ - ▁her
308
+ - me
309
+ - ate
310
+ - ▁all
311
+ - ▁th
312
+ - ▁if
313
+ - ▁right
314
+ - ▁his
315
+ - ▁ma
316
+ - ▁lo
317
+ - ▁which
318
+ - ide
319
+ - ▁P
320
+ - ▁more
321
+ - ▁then
322
+ - ul
323
+ - ast
324
+ - x
325
+ - ight
326
+ - ill
327
+ - ▁So
328
+ - ▁sp
329
+ - ▁going
330
+ - ▁some
331
+ - ure
332
+ - ▁their
333
+ - ig
334
+ - ▁no
335
+ - ▁ro
336
+ - ▁think
337
+ - ▁who
338
+ - ▁pro
339
+ - ver
340
+ - ive
341
+ - est
342
+ - ▁co
343
+ - ▁di
344
+ - '0'
345
+ - ist
346
+ - ▁k
347
+ - age
348
+ - ▁d
349
+ - ▁time
350
+ - ▁L
351
+ - ies
352
+ - ▁will
353
+ - ▁man
354
+ - ▁when
355
+ - ▁D
356
+ - les
357
+ - ▁F
358
+ - ▁want
359
+ - ff
360
+ - ity
361
+ - ▁un
362
+ - '?'
363
+ - ▁start
364
+ - ▁G
365
+ - ▁uh
366
+ - ▁get
367
+ - ok
368
+ - ▁take
369
+ - ▁po
370
+ - li
371
+ - ▁ho
372
+ - ▁way
373
+ - ▁don
374
+ - ▁yeah
375
+ - ▁really
376
+ - ▁say
377
+ - ▁look
378
+ - ▁good
379
+ - ▁ra
380
+ - ▁pr
381
+ - ▁had
382
+ - ttle
383
+ - ▁comp
384
+ - ort
385
+ - ish
386
+ - ▁ex
387
+ - ally
388
+ - ▁sa
389
+ - ▁how
390
+ - end
391
+ - ant
392
+ - ▁O
393
+ - ▁um
394
+ - way
395
+ - ance
396
+ - ▁other
397
+ - ▁two
398
+ - ine
399
+ - ever
400
+ - able
401
+ - ▁com
402
+ - other
403
+ - ▁first
404
+ - ▁back
405
+ - ▁al
406
+ - ers
407
+ - ions
408
+ - ▁now
409
+ - ▁off
410
+ - ning
411
+ - ▁down
412
+ - ▁has
413
+ - ▁than
414
+ - ▁car
415
+ - ▁Th
416
+ - very
417
+ - ice
418
+ - ▁dr
419
+ - ▁been
420
+ - ▁him
421
+ - ▁here
422
+ - ated
423
+ - '5'
424
+ - ▁hand
425
+ - ▁day
426
+ - ▁hear
427
+ - each
428
+ - ▁would
429
+ - ▁over
430
+ - ▁oh
431
+ - ▁cha
432
+ - ood
433
+ - ▁did
434
+ - ugh
435
+ - ▁per
436
+ - ▁let
437
+ - ▁str
438
+ - ▁tra
439
+ - ▁got
440
+ - ext
441
+ - '1'
442
+ - ▁We
443
+ - ▁Shields
444
+ - ▁come
445
+ - ▁should
446
+ - ▁could
447
+ - light
448
+ - '2'
449
+ - ▁people
450
+ - ▁again
451
+ - ▁year
452
+ - ▁app
453
+ - ▁into
454
+ - ▁any
455
+ - ▁N
456
+ - ▁mean
457
+ - ▁o
458
+ - ▁mus
459
+ - ▁lot
460
+ - ▁said
461
+ - ▁long
462
+ - ▁these
463
+ - ▁lea
464
+ - sh
465
+ - ▁vi
466
+ - ▁part
467
+ - ▁every
468
+ - ▁our
469
+ - ▁You
470
+ - ious
471
+ - ▁fight
472
+ - ▁Ch
473
+ - ark
474
+ - ▁may
475
+ - ▁Hammer
476
+ - ▁because
477
+ - ▁most
478
+ - ▁came
479
+ - ▁four
480
+ - ful
481
+ - ▁No
482
+ - ize
483
+ - ▁where
484
+ - ▁okay
485
+ - ▁much
486
+ - ▁ask
487
+ - ▁through
488
+ - ▁before
489
+ - ▁work
490
+ - ▁even
491
+ - ▁three
492
+ - mber
493
+ - ▁win
494
+ - ▁flight
495
+ - ake
496
+ - K
497
+ - ▁place
498
+ - ▁play
499
+ - ▁though
500
+ - ▁pound
501
+ - ▁bit
502
+ - land
503
+ - ▁va
504
+ - ▁talk
505
+ - ▁kind
506
+ - ▁Line
507
+ - ▁make
508
+ - hap
509
+ - ▁big
510
+ - ▁leav
511
+ - ▁something
512
+ - ▁game
513
+ - ▁under
514
+ - ▁feel
515
+ - self
516
+ - ▁give
517
+ - ▁includ
518
+ - U
519
+ - ▁twenty
520
+ - ▁guard
521
+ - ▁left
522
+ - ▁round
523
+ - ▁great
524
+ - body
525
+ - ▁gra
526
+ - ress
527
+ - lso
528
+ - '3'
529
+ - ▁everything
530
+ - ▁those
531
+ - ▁after
532
+ - ▁tell
533
+ - ▁need
534
+ - ▁yes
535
+ - qua
536
+ - ham
537
+ - ▁minutes
538
+ - ▁question
539
+ - ▁around
540
+ - ▁punch
541
+ - ▁course
542
+ - ▁gonna
543
+ - ▁person
544
+ - ▁move
545
+ - ▁plan
546
+ - ▁ear
547
+ - ept
548
+ - ▁Airport
549
+ - ▁Okay
550
+ - ▁found
551
+ - ▁seven
552
+ - ▁help
553
+ - que
554
+ - ▁qui
555
+ - ▁keep
556
+ - ▁guys
557
+ - ▁house
558
+ - ▁run
559
+ - ▁turn
560
+ - ▁better
561
+ - ▁stop
562
+ - ward
563
+ - ddle
564
+ - ▁second
565
+ - ground
566
+ - ▁world
567
+ - ▁high
568
+ - ▁point
569
+ - ▁hold
570
+ - ▁call
571
+ - '6'
572
+ - ▁actually
573
+ - ▁probably
574
+ - ▁heaven
575
+ - ▁speci
576
+ - ▁everyone
577
+ - ▁why
578
+ - ▁presen
579
+ - ▁thir
580
+ - lright
581
+ - ▁eye
582
+ - eath
583
+ - ▁Tak
584
+ - '!'
585
+ - '"'
586
+ - '4'
587
+ - ▁hundred
588
+ - ▁answer
589
+ - ▁small
590
+ - ▁wait
591
+ - ▁nothing
592
+ - q
593
+ - '8'
594
+ - V
595
+ - ▁countr
596
+ - ▁problem
597
+ - ▁continu
598
+ - ▁close
599
+ - ▁priva
600
+ - ▁20
601
+ - ▁pleas
602
+ - ▁walk
603
+ - ▁open
604
+ - ▁lay
605
+ - ▁Station
606
+ - ▁moment
607
+ - ▁Yeah
608
+ - ▁public
609
+ - possibl
610
+ - ▁happen
611
+ - together
612
+ - ▁while
613
+ - asically
614
+ - ▁money
615
+ - ▁wrong
616
+ - B
617
+ - ▁puzzle
618
+ - '7'
619
+ - ▁journ
620
+ - ▁rainbow
621
+ - ▁thousand
622
+ - I
623
+ - '9'
624
+ - S
625
+ - P
626
+ - '%'
627
+ - A
628
+ - D
629
+ - L
630
+ - F
631
+ - ’
632
+ - O
633
+ - G
634
+ - N
635
+ - á
636
+ - C
637
+ - $
638
+ - Z
639
+ - Y
640
+ - R
641
+ - E
642
+ - J
643
+ - W
644
+ - M
645
+ - H
646
+ - j
647
+ - –
648
+ - ;
649
+ - Q
650
+ - X
651
+ - ']'
652
+ - −
653
+ - '&'
654
+ - T
655
+ - '['
656
+ - <sos/eos>
657
+ init: xavier_uniform
658
+ model_conf: {}
659
+ use_ref_audio: true
660
+ use_ref_text: true
661
+ use_preprocessor: true
662
+ token_type: bpe
663
+ bpemodel: data/token_list/bpe_unigram500/bpe.model
664
+ non_linguistic_symbols: null
665
+ cleaner: null
666
+ g2p: null
667
+ frontend: s3prl
668
+ frontend_conf:
669
+ frontend_conf:
670
+ upstream: wavlm_large
671
+ download_dir: ./hub
672
+ multilayer_feature: true
673
+ universa: base
674
+ universa_conf:
675
+ embedding_dim: 256
676
+ audio_encoder_type: transformer
677
+ audio_encoder_params:
678
+ num_blocks: 4
679
+ attention_heads: 4
680
+ linear_units: 1024
681
+ dropout_rate: 0.1
682
+ positional_dropout_rate: 0.1
683
+ attention_dropout_rate: 0.1
684
+ input_layer: conv2d
685
+ normalize_before: true
686
+ concat_after: false
687
+ positionwise_layer_type: linear
688
+ positionwise_conv_kernel_size: 1
689
+ layer_drop_rate: 0.1
690
+ qk_norm: false
691
+ use_flash_attn: false
692
+ text_encoder_type: transformer
693
+ text_encoder_params:
694
+ num_blocks: 4
695
+ attention_heads: 4
696
+ linear_units: 1024
697
+ dropout_rate: 0.1
698
+ positional_dropout_rate: 0.1
699
+ attention_dropout_rate: 0.1
700
+ input_layer: linear
701
+ normalize_before: true
702
+ concat_after: false
703
+ positionwise_layer_type: linear
704
+ positionwise_conv_kernel_size: 1
705
+ layer_drop_rate: 0.1
706
+ qk_norm: false
707
+ use_flash_attn: false
708
+ cross_attention_type: multihead
709
+ cross_attention_params:
710
+ n_head: 4
711
+ dropout_rate: 0.1
712
+ pooling_type: mean
713
+ projector_type: linear
714
+ multi_branch: true
715
+ required:
716
+ - output_dir
717
+ - metric2id
718
+ version: '202412'
719
+ distributed: false
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/backward_time.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/clip.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/dns_p808_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/dns_p808_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0corr_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0corr_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0rmse_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/f0rmse_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/forward_time.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/gpu_max_cached_mem_GB.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/grad_norm.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/iter_time.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/loss.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/loss_scale.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mcd_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mcd_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mos_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/mos_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/optim0_lr0.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/optim_step_time.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/pesq_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/pesq_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/sheet_ssqa_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/sheet_ssqa_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/si_snr_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/si_snr_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/speech_bert_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/speech_bert_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/spk_similarity_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/spk_similarity_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/stoi_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/stoi_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/train_time.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/utmos_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/utmos_overall.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/wer_l1.png ADDED
update_exp/universa_train_universa_wavlm_freeze_raw_fs16000/images/wer_overall.png ADDED