Update README.md
Browse files
README.md
CHANGED
@@ -16,7 +16,7 @@ license: cc-by-4.0
|
|
16 |
|
17 |
Our demo is available [here](https://huggingface.co/spaces/pyf98/OWSM_v3_demo).
|
18 |
|
19 |
-
**OWSM v3.1 is an improved version of OWSM v3. It significantly outperforms OWSM v3 in almost all evaluation benchmarks.**
|
20 |
We do not include any new training data. Instead, we utilize a state-of-the-art speech encoder, [E-Branchformer](https://arxiv.org/abs/2210.00077).
|
21 |
|
22 |
OWSM v3.1 has 1.02B parameters in total and is trained on 180k hours of public speech data.
|
@@ -31,11 +31,20 @@ Specifically, it supports the following speech-to-text tasks:
|
|
31 |
### Citing OWSM, Branchformers and ESPnet
|
32 |
|
33 |
```BibTex
|
34 |
-
@
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
}
|
40 |
@inproceedings{peng23b_interspeech,
|
41 |
author={Yifan Peng and Kwangyoun Kim and Felix Wu and Brian Yan and Siddhant Arora and William Chen and Jiyang Tang and Suwon Shon and Prashant Sridhar and Shinji Watanabe},
|
|
|
16 |
|
17 |
Our demo is available [here](https://huggingface.co/spaces/pyf98/OWSM_v3_demo).
|
18 |
|
19 |
+
**[OWSM v3.1](https://arxiv.org/abs/2401.16658) is an improved version of OWSM v3. It significantly outperforms OWSM v3 in almost all evaluation benchmarks.**
|
20 |
We do not include any new training data. Instead, we utilize a state-of-the-art speech encoder, [E-Branchformer](https://arxiv.org/abs/2210.00077).
|
21 |
|
22 |
OWSM v3.1 has 1.02B parameters in total and is trained on 180k hours of public speech data.
|
|
|
31 |
### Citing OWSM, Branchformers and ESPnet
|
32 |
|
33 |
```BibTex
|
34 |
+
@misc{peng2024owsm,
|
35 |
+
title={OWSM v3.1: Better and Faster Open Whisper-Style Speech Models based on E-Branchformer},
|
36 |
+
author={Yifan Peng and Jinchuan Tian and William Chen and Siddhant Arora and Brian Yan and Yui Sudo and Muhammad Shakeel and Kwanghee Choi and Jiatong Shi and Xuankai Chang and Jee-weon Jung and Shinji Watanabe},
|
37 |
+
year={2024},
|
38 |
+
eprint={2401.16658},
|
39 |
+
archivePrefix={arXiv},
|
40 |
+
primaryClass={cs.CL}
|
41 |
+
}
|
42 |
+
@INPROCEEDINGS{owsm-asru23,
|
43 |
+
author={Peng, Yifan and Tian, Jinchuan and Yan, Brian and Berrebbi, Dan and Chang, Xuankai and Li, Xinjian and Shi, Jiatong and Arora, Siddhant and Chen, William and Sharma, Roshan and Zhang, Wangyou and Sudo, Yui and Shakeel, Muhammad and Jung, Jee-Weon and Maiti, Soumi and Watanabe, Shinji},
|
44 |
+
booktitle={2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
|
45 |
+
title={Reproducing Whisper-Style Training Using An Open-Source Toolkit And Publicly Available Data},
|
46 |
+
year={2023},
|
47 |
+
doi={10.1109/ASRU57964.2023.10389676}
|
48 |
}
|
49 |
@inproceedings{peng23b_interspeech,
|
50 |
author={Yifan Peng and Kwangyoun Kim and Felix Wu and Brian Yan and Siddhant Arora and William Chen and Jiyang Tang and Suwon Shon and Prashant Sridhar and Shinji Watanabe},
|