drmeeseeks
commited on
Commit
•
5804643
1
Parent(s):
46accd2
Upload whisper_python_am_et.ipynb
Browse files- whisper_python_am_et.ipynb +11 -10
whisper_python_am_et.ipynb
CHANGED
@@ -628,12 +628,13 @@
|
|
628 |
},
|
629 |
{
|
630 |
"cell_type": "code",
|
631 |
-
"execution_count":
|
632 |
"metadata": {
|
633 |
"id": "iN2MgL5gYgmB"
|
634 |
},
|
635 |
"outputs": [],
|
636 |
"source": [
|
|
|
637 |
"!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
|
638 |
" --model_name_or_path=\"openai/whisper-small\" \\\n",
|
639 |
" --dataset_name=\"google/fleurs\" \\\n",
|
@@ -642,7 +643,7 @@
|
|
642 |
" --train_split_name=\"train+validation\" \\\n",
|
643 |
" --eval_split_name=\"test\" \\\n",
|
644 |
" --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
|
645 |
-
" --max_steps=\"
|
646 |
" --output_dir=\"./whisper-small-amet\" \\\n",
|
647 |
" --per_device_train_batch_size=\"64\" \\\n",
|
648 |
" --per_device_eval_batch_size=\"32\" \\\n",
|
@@ -651,9 +652,9 @@
|
|
651 |
" --learning_rate=\"1e-5\" \\\n",
|
652 |
" --warmup_steps=\"500\" \\\n",
|
653 |
" --evaluation_strategy=\"steps\" \\\n",
|
654 |
-
" --eval_steps=\"
|
655 |
" --save_strategy=\"steps\" \\\n",
|
656 |
-
" --save_steps=\"
|
657 |
" --generation_max_length=\"225\" \\\n",
|
658 |
" --length_column_name=\"input_length\" \\\n",
|
659 |
" --max_duration_in_seconds=\"30\" \\\n",
|
@@ -662,22 +663,22 @@
|
|
662 |
" --report_to=\"tensorboard\" \\\n",
|
663 |
" --metric_for_best_model=\"wer\" \\\n",
|
664 |
" --greater_is_better=\"False\" \\\n",
|
665 |
-
" --load_best_model_at_end \\\n",
|
666 |
" --gradient_checkpointing \\\n",
|
667 |
" --fp16 \\\n",
|
668 |
" --overwrite_output_dir \\\n",
|
669 |
" --do_train \\\n",
|
670 |
-
" --do_eval
|
671 |
" --predict_with_generate \\\n",
|
672 |
-
" --do_normalize_eval \\\n",
|
673 |
" --use_auth_token \\\n",
|
674 |
" --no_streaming \\\n",
|
675 |
-
" --push_to_hub
|
676 |
]
|
677 |
},
|
678 |
{
|
679 |
"cell_type": "code",
|
680 |
-
"execution_count":
|
681 |
"metadata": {
|
682 |
"colab": {
|
683 |
"base_uri": "https://localhost:8080/"
|
@@ -709,7 +710,7 @@
|
|
709 |
"metadata": {},
|
710 |
"outputs": [],
|
711 |
"source": [
|
712 |
-
"!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --device=0 --language=\"am\""
|
713 |
]
|
714 |
},
|
715 |
{
|
|
|
628 |
},
|
629 |
{
|
630 |
"cell_type": "code",
|
631 |
+
"execution_count": 23,
|
632 |
"metadata": {
|
633 |
"id": "iN2MgL5gYgmB"
|
634 |
},
|
635 |
"outputs": [],
|
636 |
"source": [
|
637 |
+
"!rm run.sh\n",
|
638 |
"!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
|
639 |
" --model_name_or_path=\"openai/whisper-small\" \\\n",
|
640 |
" --dataset_name=\"google/fleurs\" \\\n",
|
|
|
643 |
" --train_split_name=\"train+validation\" \\\n",
|
644 |
" --eval_split_name=\"test\" \\\n",
|
645 |
" --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
|
646 |
+
" --max_steps=\"1000\" \\\n",
|
647 |
" --output_dir=\"./whisper-small-amet\" \\\n",
|
648 |
" --per_device_train_batch_size=\"64\" \\\n",
|
649 |
" --per_device_eval_batch_size=\"32\" \\\n",
|
|
|
652 |
" --learning_rate=\"1e-5\" \\\n",
|
653 |
" --warmup_steps=\"500\" \\\n",
|
654 |
" --evaluation_strategy=\"steps\" \\\n",
|
655 |
+
" --eval_steps=\"10000\" \\\n",
|
656 |
" --save_strategy=\"steps\" \\\n",
|
657 |
+
" --save_steps=\"100\" \\\n",
|
658 |
" --generation_max_length=\"225\" \\\n",
|
659 |
" --length_column_name=\"input_length\" \\\n",
|
660 |
" --max_duration_in_seconds=\"30\" \\\n",
|
|
|
663 |
" --report_to=\"tensorboard\" \\\n",
|
664 |
" --metric_for_best_model=\"wer\" \\\n",
|
665 |
" --greater_is_better=\"False\" \\\n",
|
666 |
+
" --load_best_model_at_end=\"False\" \\\n",
|
667 |
" --gradient_checkpointing \\\n",
|
668 |
" --fp16 \\\n",
|
669 |
" --overwrite_output_dir \\\n",
|
670 |
" --do_train \\\n",
|
671 |
+
" --do_eval=\"False\" \\\n",
|
672 |
" --predict_with_generate \\\n",
|
673 |
+
" --do_normalize_eval=\"False\" \\\n",
|
674 |
" --use_auth_token \\\n",
|
675 |
" --no_streaming \\\n",
|
676 |
+
" --push_to_hub=\"True\"' >> run.sh"
|
677 |
]
|
678 |
},
|
679 |
{
|
680 |
"cell_type": "code",
|
681 |
+
"execution_count": 15,
|
682 |
"metadata": {
|
683 |
"colab": {
|
684 |
"base_uri": "https://localhost:8080/"
|
|
|
710 |
"metadata": {},
|
711 |
"outputs": [],
|
712 |
"source": [
|
713 |
+
"!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --batch_size=32 --max_eval_samples=64 --device=0 --language=\"am\""
|
714 |
]
|
715 |
},
|
716 |
{
|