# clone dataset git clone https://huggingface.co/datasets/kotoba-tech/kotoba-whisper-eval # convert to 16khz ffmpeg -i kotoba-whisper-eval/audio/long_interview_1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/long_interview_1.wav ffmpeg -i kotoba-whisper-eval/audio/manzai1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai1.wav ffmpeg -i kotoba-whisper-eval/audio/manzai2.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai2.wav ffmpeg -i kotoba-whisper-eval/audio/manzai3.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai3.wav # run the benchmark wget https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0/raw/main/benchmark.py python benchmark.py