Shih-Lun Wu commited on
Commit
36fec8e
·
1 Parent(s): 288c802

add model files

Browse files
README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - speech-recognition
6
+ - openai-whisper
7
+ language: en
8
+ datasets:
9
+ - librispeech
10
+ license: cc-by-4.0
11
+ ---
12
+
13
+ ## ESPnet2 ASR model
14
+
15
+ ### `espnet/shihlun_asr_whisper_medium_finetuned_librispeech100`
16
+ This model was trained by Shih-Lun Wu (slseanwu) using the librispeech_100 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+ ```bash
20
+ cd espnet
21
+ pip install -e .
22
+ cd egs2/librispeech_100/asr1
23
+
24
+ train_set="train_clean_100"
25
+ valid_set="dev"
26
+ test_sets="test_clean test_other dev_clean dev_other"
27
+ asr_tag=whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs
28
+ asr_config=conf/tuning/train_asr_whisper_full.yaml
29
+ inference_config=conf/decode_asr_whisper_noctc_greedy.yaml
30
+
31
+ ./asr.sh \
32
+ --skip_data_prep false \
33
+ --skip_train true \
34
+ --skip_eval false \
35
+ --lang en \
36
+ --ngpu 1 \
37
+ --nj 4 \
38
+ --stage 1 \
39
+ --stop_stage 13 \
40
+ --gpu_inference true \
41
+ --inference_nj 1 \
42
+ --token_type whisper_multilingual \
43
+ --feats_normalize '' \
44
+ --max_wav_duration 30 \
45
+ --speed_perturb_factors "0.9 1.0 1.1" \
46
+ --audio_format "flac.ark" \
47
+ --feats_type raw \
48
+ --use_lm false \
49
+ --cleaner whisper_en \
50
+ --asr_tag "${asr_tag}" \
51
+ --asr_config "${asr_config}" \
52
+ --inference_config "${inference_config}" \
53
+ --inference_asr_model valid.acc.ave.pth \
54
+ --train_set "${train_set}" \
55
+ --valid_set "${valid_set}" \
56
+ --test_sets "${test_sets}" "$@"
57
+ ```
58
+
59
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
60
+ # RESULTS
61
+ ## Environments
62
+ - date: `Mon Jan 9 23:06:34 CST 2023`
63
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
64
+ - espnet version: `espnet 202211`
65
+ - pytorch version: `pytorch 1.12.1`
66
+ - Git hash: `d89be931dcc8f61437ac49cbe39a773f2054c50c`
67
+ - Commit date: `Mon Jan 9 11:06:45 2023 -0600`
68
+
69
+ ## asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs
70
+ ### WER
71
+
72
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
73
+ |---|---|---|---|---|---|---|---|---|
74
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_clean|2703|54798|97.7|1.9|0.3|0.3|2.6|30.1|
75
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_other|2864|51528|95.3|4.3|0.4|0.6|5.3|45.4|
76
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_clean|2620|53027|97.6|2.1|0.3|0.4|2.7|30.9|
77
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_other|2939|52882|95.1|4.4|0.5|0.7|5.6|47.5|
78
+
79
+ ### CER
80
+
81
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
82
+ |---|---|---|---|---|---|---|---|---|
83
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_clean|2703|287287|99.3|0.3|0.4|0.3|1.0|30.1|
84
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_other|2864|265648|98.3|1.0|0.7|0.6|2.3|45.4|
85
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_clean|2620|280691|99.3|0.3|0.3|0.3|1.0|30.9|
86
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_other|2939|271738|98.3|1.0|0.7|0.7|2.4|47.5|
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/RESULTS.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Mon Jan 9 23:06:34 CST 2023`
5
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202211`
7
+ - pytorch version: `pytorch 1.12.1`
8
+ - Git hash: `d89be931dcc8f61437ac49cbe39a773f2054c50c`
9
+ - Commit date: `Mon Jan 9 11:06:45 2023 -0600`
10
+
11
+ ## asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_clean|2703|54798|97.7|1.9|0.3|0.3|2.6|30.1|
17
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_other|2864|51528|95.3|4.3|0.4|0.6|5.3|45.4|
18
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_clean|2620|53027|97.6|2.1|0.3|0.4|2.7|30.9|
19
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_other|2939|52882|95.1|4.4|0.5|0.7|5.6|47.5|
20
+
21
+ ### CER
22
+
23
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
24
+ |---|---|---|---|---|---|---|---|---|
25
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_clean|2703|287287|99.3|0.3|0.4|0.3|1.0|30.1|
26
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dev_other|2864|265648|98.3|1.0|0.7|0.6|2.3|45.4|
27
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_clean|2620|280691|99.3|0.3|0.3|0.3|1.0|30.9|
28
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/test_other|2939|271738|98.3|1.0|0.7|0.7|2.4|47.5|
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/config.yaml ADDED
The diff for this file is too large to render. See raw diff
 
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/acc.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/backward_time.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/cer.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/forward_time.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/iter_time.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/loss.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/loss_att.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/optim0_lr0.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/optim_step_time.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/train_time.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/images/wer.png ADDED
exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/valid.acc.ave.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8c650ab68ef26d23a42a124ae7289a6cecf4ff3b73823a683fec7136b1efff
3
+ size 3055775259
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202211'
2
+ files:
3
+ asr_model_file: exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/valid.acc.ave.pth
4
+ python: "3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]"
5
+ timestamp: "Mon Jan 9 23:06:34 CST 2023"
6
+ torch: 1.12.1+cu117
7
+ yaml_files:
8
+ asr_train_config: exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/config.yaml