Training in progress, step 3000
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- config.json +2 -2
- inspect_dataset.py +26 -0
- model.safetensors +1 -1
- requirements.txt +11 -0
- run.sh +9 -6
- run_cv.sh +39 -0
- run_speech_recognition_seq2seq_streaming.py +144 -55
- run_speech_recognition_seq2seq_streaming_cv.py +657 -0
- tokenizer_config.json +1 -0
- training_args.bin +2 -2
- wandb/debug-internal.log +7 -0
- wandb/debug.log +25 -0
- wandb/run-20250212_121751-d4i88lzt/files/config.yaml +512 -0
- wandb/run-20250212_121751-d4i88lzt/files/output.log +22 -0
- wandb/run-20250212_121751-d4i88lzt/files/requirements.txt +115 -0
- wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json +85 -0
- wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json +1 -0
- wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log +14 -0
- wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log +15 -0
- wandb/run-20250212_121751-d4i88lzt/logs/debug.log +26 -0
- wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb +0 -0
- wandb/run-20250212_122637-v3d3ouvn/files/config.yaml +512 -0
- wandb/run-20250212_122637-v3d3ouvn/files/output.log +22 -0
- wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt +115 -0
- wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json +85 -0
- wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json +1 -0
- wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log +14 -0
- wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log +15 -0
- wandb/run-20250212_122637-v3d3ouvn/logs/debug.log +26 -0
- wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb +0 -0
- wandb/run-20250212_122854-4m048f5s/files/config.yaml +512 -0
- wandb/run-20250212_122854-4m048f5s/files/output.log +22 -0
- wandb/run-20250212_122854-4m048f5s/files/requirements.txt +115 -0
- wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json +85 -0
- wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json +1 -0
- wandb/run-20250212_122854-4m048f5s/logs/debug-core.log +14 -0
- wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log +15 -0
- wandb/run-20250212_122854-4m048f5s/logs/debug.log +26 -0
- wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb +0 -0
- wandb/run-20250212_125202-c6xjc1gs/files/config.yaml +512 -0
- wandb/run-20250212_125202-c6xjc1gs/files/output.log +22 -0
- wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt +115 -0
- wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json +85 -0
- wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json +1 -0
- wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log +14 -0
- wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log +15 -0
- wandb/run-20250212_125202-c6xjc1gs/logs/debug.log +26 -0
- wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb +0 -0
- wandb/run-20250212_125924-xhsgsxqq/files/config.yaml +512 -0
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb filter=lfs diff=lfs merge=lfs -text
|
config.json
CHANGED
@@ -31,7 +31,7 @@
|
|
31 |
"mask_time_length": 10,
|
32 |
"mask_time_min_masks": 2,
|
33 |
"mask_time_prob": 0.05,
|
34 |
-
"max_length":
|
35 |
"max_source_positions": 1500,
|
36 |
"max_target_positions": 448,
|
37 |
"median_filter_width": 7,
|
@@ -41,7 +41,7 @@
|
|
41 |
"pad_token_id": 50257,
|
42 |
"scale_embedding": false,
|
43 |
"torch_dtype": "float32",
|
44 |
-
"transformers_version": "4.
|
45 |
"use_cache": false,
|
46 |
"use_weighted_layer_sum": false,
|
47 |
"vocab_size": 51865
|
|
|
31 |
"mask_time_length": 10,
|
32 |
"mask_time_min_masks": 2,
|
33 |
"mask_time_prob": 0.05,
|
34 |
+
"max_length": null,
|
35 |
"max_source_positions": 1500,
|
36 |
"max_target_positions": 448,
|
37 |
"median_filter_width": 7,
|
|
|
41 |
"pad_token_id": 50257,
|
42 |
"scale_embedding": false,
|
43 |
"torch_dtype": "float32",
|
44 |
+
"transformers_version": "4.49.0.dev0",
|
45 |
"use_cache": false,
|
46 |
"use_weighted_layer_sum": false,
|
47 |
"vocab_size": 51865
|
inspect_dataset.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset
|
2 |
+
import json
|
3 |
+
|
4 |
+
# Load the first few examples of the dataset
|
5 |
+
dataset = load_dataset("asierhv/composite_corpus_eu_v2.1", split="train", streaming=True)
|
6 |
+
|
7 |
+
# Get the first example
|
8 |
+
examples = []
|
9 |
+
for i, example in enumerate(dataset):
|
10 |
+
if i >= 3: # Get first 3 examples
|
11 |
+
break
|
12 |
+
examples.append(example)
|
13 |
+
|
14 |
+
# Print the structure and content
|
15 |
+
for i, example in enumerate(examples):
|
16 |
+
print(f"\nExample {i+1}:")
|
17 |
+
for key, value in example.items():
|
18 |
+
if key == "audio":
|
19 |
+
print(f"audio keys: {value.keys()}")
|
20 |
+
for audio_key, audio_value in value.items():
|
21 |
+
if isinstance(audio_value, bytes) or isinstance(audio_value, memoryview):
|
22 |
+
print(f" {audio_key}: <binary data>")
|
23 |
+
else:
|
24 |
+
print(f" {audio_key}: {audio_value}")
|
25 |
+
else:
|
26 |
+
print(f"{key}: {value}")
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 966995080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d2fd3b1746a32b70ee58ee1a3c90a88042e6300b79bcf3fd6d5bfc260af06f0
|
3 |
size 966995080
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch>=1.7
|
2 |
+
torchaudio
|
3 |
+
git+https://github.com/huggingface/transformers
|
4 |
+
git+https://github.com/huggingface/datasets
|
5 |
+
librosa
|
6 |
+
jiwer
|
7 |
+
evaluate>=0.3.0
|
8 |
+
more-itertools
|
9 |
+
tensorboard
|
10 |
+
accelerate>=0.26.0
|
11 |
+
wandb>=0.19.6
|
run.sh
CHANGED
@@ -1,12 +1,12 @@
|
|
|
|
1 |
python run_speech_recognition_seq2seq_streaming.py \
|
2 |
--model_name_or_path="openai/whisper-small" \
|
3 |
-
--dataset_name="
|
4 |
-
--dataset_config_name="eu" \
|
5 |
--language="basque" \
|
6 |
-
--train_split_name="train
|
7 |
-
--eval_split_name="
|
8 |
--model_index_name="Whisper Small Basque" \
|
9 |
-
--max_steps="
|
10 |
--output_dir="./" \
|
11 |
--per_device_train_batch_size="32" \
|
12 |
--per_device_eval_batch_size="16" \
|
@@ -21,6 +21,7 @@ python run_speech_recognition_seq2seq_streaming.py \
|
|
21 |
--generation_max_length="225" \
|
22 |
--length_column_name="input_length" \
|
23 |
--max_duration_in_seconds="30" \
|
|
|
24 |
--text_column_name="sentence" \
|
25 |
--freeze_feature_encoder="False" \
|
26 |
--report_to="tensorboard" \
|
@@ -36,4 +37,6 @@ python run_speech_recognition_seq2seq_streaming.py \
|
|
36 |
--do_normalize_eval \
|
37 |
--streaming \
|
38 |
--use_auth_token \
|
39 |
-
--push_to_hub
|
|
|
|
|
|
1 |
+
WANDB_PROJECT=whisper-small-eu \
|
2 |
python run_speech_recognition_seq2seq_streaming.py \
|
3 |
--model_name_or_path="openai/whisper-small" \
|
4 |
+
--dataset_name="asierhv/composite_corpus_eu_v2.1" \
|
|
|
5 |
--language="basque" \
|
6 |
+
--train_split_name="train" \
|
7 |
+
--eval_split_name="dev_parl+test_parl+test_cv+test_oslr" \
|
8 |
--model_index_name="Whisper Small Basque" \
|
9 |
+
--max_steps="8000" \
|
10 |
--output_dir="./" \
|
11 |
--per_device_train_batch_size="32" \
|
12 |
--per_device_eval_batch_size="16" \
|
|
|
21 |
--generation_max_length="225" \
|
22 |
--length_column_name="input_length" \
|
23 |
--max_duration_in_seconds="30" \
|
24 |
+
--audio_column_name="audio" \
|
25 |
--text_column_name="sentence" \
|
26 |
--freeze_feature_encoder="False" \
|
27 |
--report_to="tensorboard" \
|
|
|
37 |
--do_normalize_eval \
|
38 |
--streaming \
|
39 |
--use_auth_token \
|
40 |
+
--push_to_hub \
|
41 |
+
--report_to "wandb" \
|
42 |
+
--run_name "whisper-small-eu"
|
run_cv.sh
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python run_speech_recognition_seq2seq_streaming.py \
|
2 |
+
--model_name_or_path="openai/whisper-small" \
|
3 |
+
--dataset_name="mozilla-foundation/common_voice_17_0" \
|
4 |
+
--dataset_config_name="eu" \
|
5 |
+
--language="basque" \
|
6 |
+
--train_split_name="train+validation" \
|
7 |
+
--eval_split_name="test" \
|
8 |
+
--model_index_name="Whisper Small Basque" \
|
9 |
+
--max_steps="5000" \
|
10 |
+
--output_dir="./" \
|
11 |
+
--per_device_train_batch_size="32" \
|
12 |
+
--per_device_eval_batch_size="16" \
|
13 |
+
--gradient_accumulation_steps="1" \
|
14 |
+
--logging_steps="25" \
|
15 |
+
--learning_rate="1e-5" \
|
16 |
+
--warmup_steps="500" \
|
17 |
+
--evaluation_strategy="steps" \
|
18 |
+
--eval_steps="1000" \
|
19 |
+
--save_strategy="steps" \
|
20 |
+
--save_steps="1000" \
|
21 |
+
--generation_max_length="225" \
|
22 |
+
--length_column_name="input_length" \
|
23 |
+
--max_duration_in_seconds="30" \
|
24 |
+
--text_column_name="sentence" \
|
25 |
+
--freeze_feature_encoder="False" \
|
26 |
+
--report_to="tensorboard" \
|
27 |
+
--metric_for_best_model="wer" \
|
28 |
+
--greater_is_better="False" \
|
29 |
+
--load_best_model_at_end \
|
30 |
+
--gradient_checkpointing \
|
31 |
+
--fp16 \
|
32 |
+
--overwrite_output_dir \
|
33 |
+
--do_train \
|
34 |
+
--do_eval \
|
35 |
+
--predict_with_generate \
|
36 |
+
--do_normalize_eval \
|
37 |
+
--streaming \
|
38 |
+
--use_auth_token \
|
39 |
+
--push_to_hub
|
run_speech_recognition_seq2seq_streaming.py
CHANGED
@@ -25,6 +25,7 @@ import os
|
|
25 |
import sys
|
26 |
from dataclasses import dataclass, field
|
27 |
from typing import Any, Dict, List, Optional, Union
|
|
|
28 |
|
29 |
import datasets
|
30 |
import torch
|
@@ -265,25 +266,58 @@ class DataCollatorSpeechSeq2SeqWithPadding:
|
|
265 |
return batch
|
266 |
|
267 |
|
268 |
-
def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
|
269 |
"""
|
270 |
-
Utility function to load a dataset in streaming mode.
|
271 |
-
each split is loaded individually and then splits combined by taking alternating examples from
|
272 |
-
each (interleaving).
|
273 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
if "+" in split:
|
275 |
-
#
|
276 |
-
dataset_splits = [
|
277 |
-
|
278 |
-
|
279 |
-
]
|
280 |
-
# interleave multiple splits to form one dataset
|
281 |
-
interleaved_dataset = interleave_datasets(dataset_splits)
|
282 |
-
return interleaved_dataset
|
283 |
else:
|
284 |
-
|
285 |
-
dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
|
286 |
-
return dataset
|
287 |
|
288 |
|
289 |
def main():
|
@@ -356,37 +390,39 @@ def main():
|
|
356 |
raw_datasets["train"] = load_maybe_streaming_dataset(
|
357 |
data_args.dataset_name,
|
358 |
data_args.dataset_config_name,
|
|
|
359 |
split=data_args.train_split_name,
|
360 |
-
# xezpeleta
|
361 |
-
#use_auth_token=True if model_args.use_auth_token else None,
|
362 |
streaming=data_args.streaming,
|
363 |
)
|
364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
if training_args.do_eval:
|
366 |
raw_datasets["eval"] = load_maybe_streaming_dataset(
|
367 |
data_args.dataset_name,
|
368 |
data_args.dataset_config_name,
|
|
|
369 |
split=data_args.eval_split_name,
|
370 |
-
#use_auth_token=True if model_args.use_auth_token else None,
|
371 |
streaming=data_args.streaming,
|
372 |
)
|
373 |
|
374 |
-
raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
|
375 |
-
|
376 |
-
if data_args.audio_column_name not in raw_datasets_features:
|
377 |
-
raise ValueError(
|
378 |
-
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
|
379 |
-
"Make sure to set `--audio_column_name` to the correct audio column - one of "
|
380 |
-
f"{', '.join(raw_datasets_features)}."
|
381 |
-
)
|
382 |
-
|
383 |
-
if data_args.text_column_name not in raw_datasets_features:
|
384 |
-
raise ValueError(
|
385 |
-
f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
|
386 |
-
"Make sure to set `--text_column_name` to the correct text column - one of "
|
387 |
-
f"{', '.join(raw_datasets_features)}."
|
388 |
-
)
|
389 |
-
|
390 |
# 5. Load pretrained model, tokenizer, and feature extractor
|
391 |
#
|
392 |
# Distributed training:
|
@@ -438,14 +474,12 @@ def main():
|
|
438 |
tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
|
439 |
|
440 |
# 6. Resample speech dataset if necessary
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
|
445 |
-
)
|
446 |
|
447 |
# 7. Preprocessing the datasets.
|
448 |
-
|
449 |
max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
|
450 |
min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
|
451 |
audio_column_name = data_args.audio_column_name
|
@@ -469,20 +503,59 @@ def main():
|
|
469 |
else raw_datasets["eval"].select(range(data_args.max_eval_samples))
|
470 |
)
|
471 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
def prepare_dataset(batch):
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
|
487 |
with training_args.main_process_first(desc="dataset map pre-processing"):
|
488 |
vectorized_datasets = raw_datasets.map(
|
@@ -490,6 +563,16 @@ def main():
|
|
490 |
remove_columns=raw_datasets_features,
|
491 |
).with_format("torch")
|
492 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
if training_args.do_train and data_args.streaming:
|
494 |
# manually shuffle if streaming (done by the trainer for non-streaming)
|
495 |
vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
|
@@ -551,7 +634,13 @@ def main():
|
|
551 |
# Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
|
552 |
# Only required for streaming: Trainer automatically shuffles non-streaming datasets
|
553 |
class ShuffleCallback(TrainerCallback):
|
554 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
555 |
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
556 |
pass # set_epoch() is handled by the Trainer
|
557 |
elif isinstance(train_dataloader.dataset, IterableDataset):
|
@@ -563,7 +652,7 @@ def main():
|
|
563 |
args=training_args,
|
564 |
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
565 |
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
566 |
-
|
567 |
data_collator=data_collator,
|
568 |
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
569 |
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
|
|
25 |
import sys
|
26 |
from dataclasses import dataclass, field
|
27 |
from typing import Any, Dict, List, Optional, Union
|
28 |
+
import numpy
|
29 |
|
30 |
import datasets
|
31 |
import torch
|
|
|
266 |
return batch
|
267 |
|
268 |
|
269 |
+
def load_maybe_streaming_dataset(dataset_name, dataset_config_name, data_args, split="train", streaming=True, **kwargs):
|
270 |
"""
|
271 |
+
Utility function to load a dataset in streaming mode.
|
|
|
|
|
272 |
"""
|
273 |
+
logger.info(f"Loading dataset {dataset_name} split {split} (streaming={streaming})")
|
274 |
+
|
275 |
+
def load_single_split(split_name):
|
276 |
+
logger.info(f"Loading split: {split_name}")
|
277 |
+
ds = load_dataset(
|
278 |
+
dataset_name,
|
279 |
+
dataset_config_name,
|
280 |
+
split=split_name,
|
281 |
+
streaming=streaming,
|
282 |
+
trust_remote_code=True,
|
283 |
+
**kwargs
|
284 |
+
)
|
285 |
+
|
286 |
+
# Add validation transform to ensure consistent audio format
|
287 |
+
def validate_example(example):
|
288 |
+
if not isinstance(example[data_args.audio_column_name], dict):
|
289 |
+
example[data_args.audio_column_name] = {
|
290 |
+
'array': example[data_args.audio_column_name].array,
|
291 |
+
'sampling_rate': example[data_args.audio_column_name].sampling_rate,
|
292 |
+
'path': getattr(example[data_args.audio_column_name], 'path', None)
|
293 |
+
}
|
294 |
+
return example
|
295 |
+
|
296 |
+
ds = ds.map(validate_example)
|
297 |
+
|
298 |
+
# Log first example structure for debugging
|
299 |
+
try:
|
300 |
+
first_example = next(iter(ds))
|
301 |
+
logger.info(f"First example from {split_name}:")
|
302 |
+
logger.info(f" Keys: {first_example.keys()}")
|
303 |
+
if data_args.audio_column_name in first_example:
|
304 |
+
audio = first_example[data_args.audio_column_name]
|
305 |
+
logger.info(f" Audio type: {type(audio)}")
|
306 |
+
if isinstance(audio, dict):
|
307 |
+
logger.info(f" Audio keys: {audio.keys()}")
|
308 |
+
logger.info(f" Array type: {type(audio['array']) if 'array' in audio else 'missing'}")
|
309 |
+
except Exception as e:
|
310 |
+
logger.warning(f"Could not inspect first example from {split_name}: {e}")
|
311 |
+
|
312 |
+
return ds
|
313 |
+
|
314 |
if "+" in split:
|
315 |
+
# Load and validate each split individually
|
316 |
+
dataset_splits = [load_single_split(split_name) for split_name in split.split("+")]
|
317 |
+
# Interleave datasets
|
318 |
+
return interleave_datasets(dataset_splits)
|
|
|
|
|
|
|
|
|
319 |
else:
|
320 |
+
return load_single_split(split)
|
|
|
|
|
321 |
|
322 |
|
323 |
def main():
|
|
|
390 |
raw_datasets["train"] = load_maybe_streaming_dataset(
|
391 |
data_args.dataset_name,
|
392 |
data_args.dataset_config_name,
|
393 |
+
data_args,
|
394 |
split=data_args.train_split_name,
|
|
|
|
|
395 |
streaming=data_args.streaming,
|
396 |
)
|
397 |
|
398 |
+
# Get features from train dataset since it's guaranteed to exist if do_train is True
|
399 |
+
train_dataset = raw_datasets["train"]
|
400 |
+
first_example = next(iter(train_dataset))
|
401 |
+
raw_datasets_features = list(first_example.keys())
|
402 |
+
|
403 |
+
if data_args.audio_column_name not in raw_datasets_features:
|
404 |
+
raise ValueError(
|
405 |
+
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
|
406 |
+
"Make sure to set `--audio_column_name` to the correct audio column - one of "
|
407 |
+
f"{', '.join(raw_datasets_features)}."
|
408 |
+
)
|
409 |
+
|
410 |
+
if data_args.text_column_name not in raw_datasets_features:
|
411 |
+
raise ValueError(
|
412 |
+
f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
|
413 |
+
"Make sure to set `--text_column_name` to the correct text column - one of "
|
414 |
+
f"{', '.join(raw_datasets_features)}."
|
415 |
+
)
|
416 |
+
|
417 |
if training_args.do_eval:
|
418 |
raw_datasets["eval"] = load_maybe_streaming_dataset(
|
419 |
data_args.dataset_name,
|
420 |
data_args.dataset_config_name,
|
421 |
+
data_args,
|
422 |
split=data_args.eval_split_name,
|
|
|
423 |
streaming=data_args.streaming,
|
424 |
)
|
425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
# 5. Load pretrained model, tokenizer, and feature extractor
|
427 |
#
|
428 |
# Distributed training:
|
|
|
474 |
tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
|
475 |
|
476 |
# 6. Resample speech dataset if necessary
|
477 |
+
# For streaming datasets with audio bytes, sampling rate is handled in prepare_dataset
|
478 |
+
logger.info("Using feature extractor sampling rate: %d", feature_extractor.sampling_rate)
|
479 |
+
dataset_sampling_rate = feature_extractor.sampling_rate
|
|
|
|
|
480 |
|
481 |
# 7. Preprocessing the datasets.
|
482 |
+
logger.info("Starting dataset preprocessing")
|
483 |
max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
|
484 |
min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
|
485 |
audio_column_name = data_args.audio_column_name
|
|
|
503 |
else raw_datasets["eval"].select(range(data_args.max_eval_samples))
|
504 |
)
|
505 |
|
506 |
+
# Inspect dataset before processing
|
507 |
+
for split, dataset in raw_datasets.items():
|
508 |
+
try:
|
509 |
+
first_example = next(iter(dataset))
|
510 |
+
logger.info(f"First example from {split} before processing:")
|
511 |
+
logger.info(f"Keys: {first_example.keys()}")
|
512 |
+
if audio_column_name in first_example:
|
513 |
+
audio_data = first_example[audio_column_name]
|
514 |
+
logger.info(f"Audio column type: {type(audio_data)}")
|
515 |
+
if isinstance(audio_data, dict):
|
516 |
+
logger.info(f"Audio keys: {audio_data.keys()}")
|
517 |
+
except Exception as e:
|
518 |
+
logger.warning(f"Could not inspect first example from {split}: {e}")
|
519 |
+
|
520 |
def prepare_dataset(batch):
|
521 |
+
try:
|
522 |
+
# Validate audio format
|
523 |
+
audio = batch[audio_column_name]
|
524 |
+
|
525 |
+
# Load audio from bytes if needed
|
526 |
+
if isinstance(audio, dict) and 'bytes' in audio:
|
527 |
+
import io
|
528 |
+
import soundfile as sf
|
529 |
+
audio_bytes = io.BytesIO(audio['bytes'])
|
530 |
+
audio_array, sampling_rate = sf.read(audio_bytes)
|
531 |
+
audio = {'array': audio_array, 'sampling_rate': sampling_rate}
|
532 |
+
|
533 |
+
# Process audio through feature extractor
|
534 |
+
inputs = feature_extractor(audio['array'], sampling_rate=audio['sampling_rate'])
|
535 |
+
batch["input_length"] = len(audio['array'])
|
536 |
+
batch[model_input_name] = inputs[model_input_name][0]
|
537 |
+
|
538 |
+
# Process text
|
539 |
+
input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
|
540 |
+
if do_remove_punctuation:
|
541 |
+
input_str = normalizer(input_str).strip()
|
542 |
+
batch["labels"] = tokenizer(input_str).input_ids
|
543 |
+
|
544 |
+
return batch
|
545 |
+
|
546 |
+
except Exception as e:
|
547 |
+
logger.error(f"Error processing batch in prepare_dataset:")
|
548 |
+
logger.error(f" Error type: {type(e).__name__}")
|
549 |
+
logger.error(f" Error message: {str(e)}")
|
550 |
+
logger.error(f" Batch keys: {list(batch.keys())}")
|
551 |
+
if audio_column_name in batch:
|
552 |
+
audio_data = batch[audio_column_name]
|
553 |
+
logger.error(f" Audio type: {type(audio_data)}")
|
554 |
+
if isinstance(audio_data, dict):
|
555 |
+
logger.error(f" Audio keys: {list(audio_data.keys())}")
|
556 |
+
elif hasattr(audio_data, '__dict__'):
|
557 |
+
logger.error(f" Audio attributes: {dir(audio_data)}")
|
558 |
+
raise
|
559 |
|
560 |
with training_args.main_process_first(desc="dataset map pre-processing"):
|
561 |
vectorized_datasets = raw_datasets.map(
|
|
|
563 |
remove_columns=raw_datasets_features,
|
564 |
).with_format("torch")
|
565 |
|
566 |
+
# Inspect vectorized dataset
|
567 |
+
for split, dataset in vectorized_datasets.items():
|
568 |
+
try:
|
569 |
+
first_example = next(iter(dataset))
|
570 |
+
logger.info(f"First example from {split} after processing:")
|
571 |
+
logger.info(f"Keys: {first_example.keys()}")
|
572 |
+
logger.info(f"Types: {', '.join(f'{k}: {type(v)}' for k, v in first_example.items())}")
|
573 |
+
except Exception as e:
|
574 |
+
logger.warning(f"Could not inspect first example from vectorized {split}: {e}")
|
575 |
+
|
576 |
if training_args.do_train and data_args.streaming:
|
577 |
# manually shuffle if streaming (done by the trainer for non-streaming)
|
578 |
vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
|
|
|
634 |
# Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
|
635 |
# Only required for streaming: Trainer automatically shuffles non-streaming datasets
|
636 |
class ShuffleCallback(TrainerCallback):
|
637 |
+
def on_train_begin(self, args, state, control, **kwargs):
|
638 |
+
self.trainer = kwargs.get('trainer')
|
639 |
+
|
640 |
+
def on_epoch_begin(self, args, state, control, **kwargs):
|
641 |
+
if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
|
642 |
+
return
|
643 |
+
train_dataloader = self.trainer.train_dataloader
|
644 |
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
645 |
pass # set_epoch() is handled by the Trainer
|
646 |
elif isinstance(train_dataloader.dataset, IterableDataset):
|
|
|
652 |
args=training_args,
|
653 |
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
654 |
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
655 |
+
processing_class=feature_extractor,
|
656 |
data_collator=data_collator,
|
657 |
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
658 |
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
run_speech_recognition_seq2seq_streaming_cv.py
ADDED
@@ -0,0 +1,657 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding=utf-8
|
3 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
# limitations under the License.
|
16 |
+
"""
|
17 |
+
Fine-tuning the library models for sequence to sequence speech recognition
|
18 |
+
with 🤗 Datasets' streaming mode.
|
19 |
+
"""
|
20 |
+
# You can also adapt this script for your own sequence to sequence speech
|
21 |
+
# recognition task. Pointers for this are left as comments.
|
22 |
+
|
23 |
+
import logging
|
24 |
+
import os
|
25 |
+
import sys
|
26 |
+
from dataclasses import dataclass, field
|
27 |
+
from typing import Any, Dict, List, Optional, Union
|
28 |
+
import numpy
|
29 |
+
|
30 |
+
import datasets
|
31 |
+
import torch
|
32 |
+
from datasets import DatasetDict, IterableDatasetDict, interleave_datasets, load_dataset
|
33 |
+
from torch.utils.data import IterableDataset
|
34 |
+
|
35 |
+
import evaluate
|
36 |
+
import transformers
|
37 |
+
from transformers import (
|
38 |
+
AutoConfig,
|
39 |
+
AutoFeatureExtractor,
|
40 |
+
AutoModelForSpeechSeq2Seq,
|
41 |
+
AutoProcessor,
|
42 |
+
AutoTokenizer,
|
43 |
+
HfArgumentParser,
|
44 |
+
Seq2SeqTrainer,
|
45 |
+
Seq2SeqTrainingArguments,
|
46 |
+
TrainerCallback,
|
47 |
+
set_seed,
|
48 |
+
)
|
49 |
+
from transformers.models.whisper.english_normalizer import BasicTextNormalizer
|
50 |
+
from transformers.trainer_pt_utils import IterableDatasetShard
|
51 |
+
from transformers.trainer_utils import get_last_checkpoint, is_main_process
|
52 |
+
from transformers.utils import check_min_version, send_example_telemetry
|
53 |
+
from transformers.utils.versions import require_version
|
54 |
+
|
55 |
+
|
56 |
+
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
57 |
+
check_min_version("4.25.0.dev0")
|
58 |
+
|
59 |
+
require_version("datasets>=1.18.2", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
60 |
+
|
61 |
+
logger = logging.getLogger(__name__)
|
62 |
+
|
63 |
+
|
64 |
+
@dataclass
|
65 |
+
class ModelArguments:
|
66 |
+
"""
|
67 |
+
Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
|
68 |
+
"""
|
69 |
+
|
70 |
+
model_name_or_path: str = field(
|
71 |
+
metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
|
72 |
+
)
|
73 |
+
config_name: Optional[str] = field(
|
74 |
+
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
|
75 |
+
)
|
76 |
+
tokenizer_name: Optional[str] = field(
|
77 |
+
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
|
78 |
+
)
|
79 |
+
feature_extractor_name: Optional[str] = field(
|
80 |
+
default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
|
81 |
+
)
|
82 |
+
cache_dir: Optional[str] = field(
|
83 |
+
default=None,
|
84 |
+
metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
|
85 |
+
)
|
86 |
+
use_fast_tokenizer: bool = field(
|
87 |
+
default=True,
|
88 |
+
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
|
89 |
+
)
|
90 |
+
model_revision: str = field(
|
91 |
+
default="main",
|
92 |
+
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
|
93 |
+
)
|
94 |
+
use_auth_token: bool = field(
|
95 |
+
default=False,
|
96 |
+
metadata={
|
97 |
+
"help": (
|
98 |
+
"Will use the token generated when running `huggingface-cli login` (necessary to use this script "
|
99 |
+
"with private models)."
|
100 |
+
)
|
101 |
+
},
|
102 |
+
)
|
103 |
+
freeze_feature_encoder: bool = field(
|
104 |
+
default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
|
105 |
+
)
|
106 |
+
freeze_encoder: bool = field(
|
107 |
+
default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
|
108 |
+
)
|
109 |
+
forced_decoder_ids: List[List[int]] = field(
|
110 |
+
default=None,
|
111 |
+
metadata={
|
112 |
+
"help": (
|
113 |
+
"A list of pairs of integers which indicates a mapping from generation indices to token indices "
|
114 |
+
"that will be forced before sampling. For example, [[0, 123]] means the first generated token "
|
115 |
+
"will always be a token of index 123."
|
116 |
+
)
|
117 |
+
},
|
118 |
+
)
|
119 |
+
suppress_tokens: List[int] = field(
|
120 |
+
default=None, metadata={"help": "A list of tokens that will be suppressed at generation."}
|
121 |
+
)
|
122 |
+
model_index_name: str = field(default=None, metadata={"help": "Pretty name for the model card."})
|
123 |
+
|
124 |
+
|
125 |
+
@dataclass
|
126 |
+
class DataTrainingArguments:
|
127 |
+
"""
|
128 |
+
Arguments pertaining to what data we are going to input our model for training and eval.
|
129 |
+
"""
|
130 |
+
|
131 |
+
dataset_name: str = field(
|
132 |
+
default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
|
133 |
+
)
|
134 |
+
dataset_config_name: Optional[str] = field(
|
135 |
+
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
|
136 |
+
)
|
137 |
+
text_column: Optional[str] = field(
|
138 |
+
default=None,
|
139 |
+
metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
|
140 |
+
)
|
141 |
+
max_train_samples: Optional[int] = field(
|
142 |
+
default=None,
|
143 |
+
metadata={
|
144 |
+
"help": (
|
145 |
+
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
146 |
+
"value if set."
|
147 |
+
)
|
148 |
+
},
|
149 |
+
)
|
150 |
+
max_eval_samples: Optional[int] = field(
|
151 |
+
default=None,
|
152 |
+
metadata={
|
153 |
+
"help": (
|
154 |
+
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
155 |
+
"value if set."
|
156 |
+
)
|
157 |
+
},
|
158 |
+
)
|
159 |
+
audio_column_name: str = field(
|
160 |
+
default="audio",
|
161 |
+
metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
|
162 |
+
)
|
163 |
+
text_column_name: str = field(
|
164 |
+
default="text",
|
165 |
+
metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
|
166 |
+
)
|
167 |
+
max_duration_in_seconds: float = field(
|
168 |
+
default=20.0,
|
169 |
+
metadata={
|
170 |
+
"help": (
|
171 |
+
"Truncate audio files that are longer than `max_duration_in_seconds` seconds to"
|
172 |
+
" 'max_duration_in_seconds`"
|
173 |
+
)
|
174 |
+
},
|
175 |
+
)
|
176 |
+
min_duration_in_seconds: float = field(
|
177 |
+
default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
|
178 |
+
)
|
179 |
+
train_split_name: str = field(
|
180 |
+
default="train",
|
181 |
+
metadata={
|
182 |
+
"help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
|
183 |
+
},
|
184 |
+
)
|
185 |
+
eval_split_name: str = field(
|
186 |
+
default="test",
|
187 |
+
metadata={
|
188 |
+
"help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
|
189 |
+
},
|
190 |
+
)
|
191 |
+
do_lower_case: bool = field(
|
192 |
+
default=False,
|
193 |
+
metadata={"help": "Whether the target text should be lower cased."},
|
194 |
+
)
|
195 |
+
do_remove_punctuation: bool = field(
|
196 |
+
default=False,
|
197 |
+
metadata={"help": "Whether the target text should be striped of punctuation."},
|
198 |
+
)
|
199 |
+
do_normalize_eval: bool = field(
|
200 |
+
default=True,
|
201 |
+
metadata={"help": "Whether to normalise the references and predictions in the eval WER calculation."},
|
202 |
+
)
|
203 |
+
language: str = field(
|
204 |
+
default=None,
|
205 |
+
metadata={
|
206 |
+
"help": (
|
207 |
+
"Language for multilingual fine-tuning. This argument should be set for multilingual fine-tuning "
|
208 |
+
"only. For English speech recognition, it should be set to `None`."
|
209 |
+
)
|
210 |
+
},
|
211 |
+
)
|
212 |
+
task: str = field(
|
213 |
+
default="transcribe",
|
214 |
+
metadata={"help": "Task, either `transcribe` for speech recognition or `translate` for speech translation."},
|
215 |
+
)
|
216 |
+
shuffle_buffer_size: Optional[int] = field(
|
217 |
+
default=500,
|
218 |
+
metadata={
|
219 |
+
"help": (
|
220 |
+
"The number of streamed examples to download before shuffling them. The large the buffer, "
|
221 |
+
"the closer it is to real offline shuffling."
|
222 |
+
)
|
223 |
+
},
|
224 |
+
)
|
225 |
+
streaming: bool = field(
|
226 |
+
default=True,
|
227 |
+
metadata={"help": "Whether to use streaming mode to load and pre-process the data."},
|
228 |
+
)
|
229 |
+
|
230 |
+
|
231 |
+
@dataclass
|
232 |
+
class DataCollatorSpeechSeq2SeqWithPadding:
|
233 |
+
"""
|
234 |
+
Data collator that will dynamically pad the inputs received.
|
235 |
+
Args:
|
236 |
+
processor ([`WhisperProcessor`])
|
237 |
+
The processor used for processing the data.
|
238 |
+
decoder_start_token_id (`int`)
|
239 |
+
The begin-of-sentence of the decoder.
|
240 |
+
"""
|
241 |
+
|
242 |
+
processor: Any
|
243 |
+
decoder_start_token_id: int
|
244 |
+
|
245 |
+
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
246 |
+
# split inputs and labels since they have to be of different lengths and need
|
247 |
+
# different padding methods
|
248 |
+
model_input_name = self.processor.model_input_names[0]
|
249 |
+
input_features = [{model_input_name: feature[model_input_name]} for feature in features]
|
250 |
+
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
251 |
+
|
252 |
+
batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")
|
253 |
+
|
254 |
+
labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")
|
255 |
+
|
256 |
+
# replace padding with -100 to ignore loss correctly
|
257 |
+
labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
|
258 |
+
|
259 |
+
# if bos token is appended in previous tokenization step,
|
260 |
+
# cut bos token here as it's append later anyways
|
261 |
+
if (labels[:, 0] == self.decoder_start_token_id).all().cpu().item():
|
262 |
+
labels = labels[:, 1:]
|
263 |
+
|
264 |
+
batch["labels"] = labels
|
265 |
+
|
266 |
+
return batch
|
267 |
+
|
268 |
+
|
269 |
+
def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
|
270 |
+
"""
|
271 |
+
Utility function to load a dataset in streaming mode. For datasets with multiple splits,
|
272 |
+
each split is loaded individually and then splits combined by taking alternating examples from
|
273 |
+
each (interleaving).
|
274 |
+
"""
|
275 |
+
if ("+" in split):
|
276 |
+
# load multiple splits separated by the `+` symbol with streaming mode
|
277 |
+
dataset_splits = [
|
278 |
+
load_dataset(dataset_name, dataset_config_name, split=split_name, streaming=streaming, trust_remote_code=True, **kwargs)
|
279 |
+
for split_name in split.split("+")
|
280 |
+
]
|
281 |
+
# interleave multiple splits to form one dataset
|
282 |
+
interleaved_dataset = interleave_datasets(dataset_splits)
|
283 |
+
return interleaved_dataset
|
284 |
+
else:
|
285 |
+
# load a single split *with* streaming mode
|
286 |
+
dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
|
287 |
+
return dataset
|
288 |
+
|
289 |
+
|
290 |
+
def main():
|
291 |
+
# 1. Parse input arguments
|
292 |
+
# See all possible arguments in src/transformers/training_args.py
|
293 |
+
# or by passing the --help flag to this script.
|
294 |
+
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
295 |
+
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments))
|
296 |
+
|
297 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
298 |
+
# If we pass only one argument to the script and it's the path to a json file,
|
299 |
+
# let's parse it to get our arguments.
|
300 |
+
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
|
301 |
+
else:
|
302 |
+
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
303 |
+
|
304 |
+
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
|
305 |
+
# information sent is the one passed as arguments along with your Python/PyTorch versions.
|
306 |
+
send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
|
307 |
+
|
308 |
+
# 2. Setup logging
|
309 |
+
logging.basicConfig(
|
310 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
311 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
312 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
313 |
+
)
|
314 |
+
log_level = training_args.get_process_log_level()
|
315 |
+
logger.setLevel(log_level)
|
316 |
+
datasets.utils.logging.set_verbosity(log_level)
|
317 |
+
transformers.utils.logging.set_verbosity(log_level)
|
318 |
+
transformers.utils.logging.enable_default_handler()
|
319 |
+
transformers.utils.logging.enable_explicit_format()
|
320 |
+
|
321 |
+
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
|
322 |
+
|
323 |
+
# Log on each process the small summary:
|
324 |
+
logger.warning(
|
325 |
+
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
|
326 |
+
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
327 |
+
)
|
328 |
+
logger.info(f"Training/evaluation parameters {training_args}")
|
329 |
+
|
330 |
+
# Set the verbosity to info of the Transformers logger (on main process only):
|
331 |
+
if is_main_process(training_args.local_rank):
|
332 |
+
transformers.utils.logging.set_verbosity_info()
|
333 |
+
logger.info("Training/evaluation parameters %s", training_args)
|
334 |
+
|
335 |
+
# 3. Detecting last checkpoint and eventually continue from last checkpoint
|
336 |
+
last_checkpoint = None
|
337 |
+
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
|
338 |
+
last_checkpoint = get_last_checkpoint(training_args.output_dir)
|
339 |
+
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
|
340 |
+
raise ValueError(
|
341 |
+
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
342 |
+
"Use --overwrite_output_dir to overcome."
|
343 |
+
)
|
344 |
+
elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
|
345 |
+
logger.info(
|
346 |
+
f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
|
347 |
+
"the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
|
348 |
+
)
|
349 |
+
|
350 |
+
# Set seed before initializing model.
|
351 |
+
set_seed(training_args.seed)
|
352 |
+
|
353 |
+
# 4. Load dataset
|
354 |
+
raw_datasets = IterableDatasetDict() if data_args.streaming else DatasetDict()
|
355 |
+
|
356 |
+
if training_args.do_train:
|
357 |
+
raw_datasets["train"] = load_maybe_streaming_dataset(
|
358 |
+
data_args.dataset_name,
|
359 |
+
data_args.dataset_config_name,
|
360 |
+
split=data_args.train_split_name,
|
361 |
+
#use_auth_token=True if model_args.use_auth_token else None,
|
362 |
+
streaming=data_args.streaming,
|
363 |
+
)
|
364 |
+
|
365 |
+
if training_args.do_eval:
|
366 |
+
raw_datasets["eval"] = load_maybe_streaming_dataset(
|
367 |
+
data_args.dataset_name,
|
368 |
+
data_args.dataset_config_name,
|
369 |
+
split=data_args.eval_split_name,
|
370 |
+
#use_auth_token=True if model_args.use_auth_token else None,
|
371 |
+
streaming=data_args.streaming,
|
372 |
+
)
|
373 |
+
|
374 |
+
raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
|
375 |
+
|
376 |
+
if data_args.audio_column_name not in raw_datasets_features:
|
377 |
+
raise ValueError(
|
378 |
+
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
|
379 |
+
"Make sure to set `--audio_column_name` to the correct audio column - one of "
|
380 |
+
f"{', '.join(raw_datasets_features)}."
|
381 |
+
)
|
382 |
+
|
383 |
+
if data_args.text_column_name not in raw_datasets_features:
|
384 |
+
raise ValueError(
|
385 |
+
f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
|
386 |
+
"Make sure to set `--text_column_name` to the correct text column - one of "
|
387 |
+
f"{', '.join(raw_datasets_features)}."
|
388 |
+
)
|
389 |
+
|
390 |
+
# 5. Load pretrained model, tokenizer, and feature extractor
|
391 |
+
#
|
392 |
+
# Distributed training:
|
393 |
+
# The .from_pretrained methods guarantee that only one local process can concurrently
|
394 |
+
config = AutoConfig.from_pretrained(
|
395 |
+
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
|
396 |
+
cache_dir=model_args.cache_dir,
|
397 |
+
revision=model_args.model_revision,
|
398 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
399 |
+
)
|
400 |
+
|
401 |
+
config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens})
|
402 |
+
|
403 |
+
if training_args.gradient_checkpointing:
|
404 |
+
config.update({"use_cache": False})
|
405 |
+
|
406 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
407 |
+
model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
|
408 |
+
cache_dir=model_args.cache_dir,
|
409 |
+
revision=model_args.model_revision,
|
410 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
411 |
+
)
|
412 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
413 |
+
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
|
414 |
+
cache_dir=model_args.cache_dir,
|
415 |
+
use_fast=model_args.use_fast_tokenizer,
|
416 |
+
revision=model_args.model_revision,
|
417 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
418 |
+
)
|
419 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
420 |
+
model_args.model_name_or_path,
|
421 |
+
config=config,
|
422 |
+
cache_dir=model_args.cache_dir,
|
423 |
+
revision=model_args.model_revision,
|
424 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
425 |
+
)
|
426 |
+
|
427 |
+
if model.config.decoder_start_token_id is None:
|
428 |
+
raise ValueError("Make sure that `config.decoder_start_token_id` is correctly defined")
|
429 |
+
|
430 |
+
if model_args.freeze_feature_encoder:
|
431 |
+
model.freeze_feature_encoder()
|
432 |
+
|
433 |
+
if model_args.freeze_encoder:
|
434 |
+
model.freeze_encoder()
|
435 |
+
|
436 |
+
if data_args.language is not None:
|
437 |
+
# We only need to set the task id when the language is specified (i.e. in a multilingual setting)
|
438 |
+
tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
|
439 |
+
|
440 |
+
# 6. Resample speech dataset if necessary
|
441 |
+
dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
|
442 |
+
if dataset_sampling_rate != feature_extractor.sampling_rate:
|
443 |
+
raw_datasets = raw_datasets.cast_column(
|
444 |
+
data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
|
445 |
+
)
|
446 |
+
|
447 |
+
# 7. Preprocessing the datasets.
|
448 |
+
# We need to read the audio files as arrays and tokenize the targets.
|
449 |
+
max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
|
450 |
+
min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
|
451 |
+
audio_column_name = data_args.audio_column_name
|
452 |
+
text_column_name = data_args.text_column_name
|
453 |
+
model_input_name = feature_extractor.model_input_names[0]
|
454 |
+
do_lower_case = data_args.do_lower_case
|
455 |
+
do_remove_punctuation = data_args.do_remove_punctuation
|
456 |
+
normalizer = BasicTextNormalizer() # 'official' text normalizer from OpenAI
|
457 |
+
|
458 |
+
if data_args.max_train_samples is not None:
|
459 |
+
raw_datasets["train"] = (
|
460 |
+
raw_datasets["train"].take(data_args.max_train_samples)
|
461 |
+
if data_args.streaming
|
462 |
+
else raw_datasets["train"].select(range(data_args.max_train_samples))
|
463 |
+
)
|
464 |
+
|
465 |
+
if data_args.max_eval_samples is not None:
|
466 |
+
raw_datasets["eval"] = (
|
467 |
+
raw_datasets["eval"].take(data_args.max_eval_samples)
|
468 |
+
if data_args.streaming
|
469 |
+
else raw_datasets["eval"].select(range(data_args.max_eval_samples))
|
470 |
+
)
|
471 |
+
|
472 |
+
def prepare_dataset(batch):
|
473 |
+
# process audio
|
474 |
+
sample = batch[audio_column_name]
|
475 |
+
|
476 |
+
# Handle different audio formats - some datasets provide raw arrays, others provide paths
|
477 |
+
if isinstance(sample, dict):
|
478 |
+
if "array" in sample:
|
479 |
+
audio_array = sample["array"]
|
480 |
+
sampling_rate = sample["sampling_rate"]
|
481 |
+
elif "path" in sample:
|
482 |
+
# Load from path if array is not available
|
483 |
+
audio_array = sample["path"] # datasets will load the file for us
|
484 |
+
sampling_rate = sample.get("sampling_rate", feature_extractor.sampling_rate)
|
485 |
+
else:
|
486 |
+
raise ValueError(f"Unsupported audio format. Sample must contain either 'array' or 'path'. Got {sample.keys()}")
|
487 |
+
else:
|
488 |
+
# Assume it's a direct path or array
|
489 |
+
audio_array = sample
|
490 |
+
sampling_rate = feature_extractor.sampling_rate
|
491 |
+
|
492 |
+
inputs = feature_extractor(audio_array, sampling_rate=sampling_rate)
|
493 |
+
|
494 |
+
# process audio length
|
495 |
+
if isinstance(audio_array, numpy.ndarray):
|
496 |
+
batch["input_length"] = len(audio_array)
|
497 |
+
else:
|
498 |
+
# If we couldn't get the direct array length, estimate it from the processed features
|
499 |
+
batch["input_length"] = inputs.get(model_input_name)[0].shape[0] * feature_extractor.hop_length
|
500 |
+
|
501 |
+
# process targets
|
502 |
+
input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
|
503 |
+
if do_remove_punctuation:
|
504 |
+
input_str = normalizer(input_str).strip()
|
505 |
+
batch["labels"] = tokenizer(input_str).input_ids
|
506 |
+
return batch
|
507 |
+
|
508 |
+
with training_args.main_process_first(desc="dataset map pre-processing"):
|
509 |
+
vectorized_datasets = raw_datasets.map(
|
510 |
+
prepare_dataset,
|
511 |
+
remove_columns=raw_datasets_features,
|
512 |
+
).with_format("torch")
|
513 |
+
|
514 |
+
if training_args.do_train and data_args.streaming:
|
515 |
+
# manually shuffle if streaming (done by the trainer for non-streaming)
|
516 |
+
vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
|
517 |
+
buffer_size=data_args.shuffle_buffer_size,
|
518 |
+
seed=training_args.seed,
|
519 |
+
)
|
520 |
+
|
521 |
+
# filter training data that is shorter than min_input_length or longer than
|
522 |
+
# max_input_length
|
523 |
+
def is_audio_in_length_range(length):
|
524 |
+
return min_input_length < length < max_input_length
|
525 |
+
|
526 |
+
if training_args.do_train:
|
527 |
+
vectorized_datasets["train"] = vectorized_datasets["train"].filter(
|
528 |
+
is_audio_in_length_range,
|
529 |
+
input_columns=["input_length"],
|
530 |
+
)
|
531 |
+
|
532 |
+
# 8. Load Metric
|
533 |
+
metric = evaluate.load("wer")
|
534 |
+
do_normalize_eval = data_args.do_normalize_eval
|
535 |
+
|
536 |
+
def compute_metrics(pred):
|
537 |
+
pred_ids = pred.predictions
|
538 |
+
|
539 |
+
pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
|
540 |
+
|
541 |
+
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
|
542 |
+
# we do not want to group tokens when computing the metrics
|
543 |
+
label_str = tokenizer.batch_decode(pred.label_ids, skip_special_tokens=True)
|
544 |
+
|
545 |
+
if do_normalize_eval:
|
546 |
+
pred_str = [normalizer(pred) for pred in pred_str]
|
547 |
+
label_str = [normalizer(label) for label in label_str]
|
548 |
+
# filtering step to only evaluate the samples that correspond to non-zero references:
|
549 |
+
pred_str = [pred_str[i] for i in range(len(pred_str)) if len(label_str[i]) > 0]
|
550 |
+
label_str = [label_str[i] for i in range(len(label_str)) if len(label_str[i]) > 0]
|
551 |
+
|
552 |
+
wer = 100 * metric.compute(predictions=pred_str, references=label_str)
|
553 |
+
|
554 |
+
return {"wer": wer}
|
555 |
+
|
556 |
+
# 9. Create a single speech processor
|
557 |
+
if is_main_process(training_args.local_rank):
|
558 |
+
# save feature extractor, tokenizer and config
|
559 |
+
feature_extractor.save_pretrained(training_args.output_dir)
|
560 |
+
tokenizer.save_pretrained(training_args.output_dir)
|
561 |
+
config.save_pretrained(training_args.output_dir)
|
562 |
+
|
563 |
+
processor = AutoProcessor.from_pretrained(training_args.output_dir)
|
564 |
+
|
565 |
+
# 10. Define data collator
|
566 |
+
data_collator = DataCollatorSpeechSeq2SeqWithPadding(
|
567 |
+
processor=processor,
|
568 |
+
decoder_start_token_id=model.config.decoder_start_token_id,
|
569 |
+
)
|
570 |
+
|
571 |
+
# 11. Configure Trainer
|
572 |
+
# Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
|
573 |
+
# Only required for streaming: Trainer automatically shuffles non-streaming datasets
|
574 |
+
class ShuffleCallback(TrainerCallback):
|
575 |
+
def on_train_begin(self, args, state, control, **kwargs):
|
576 |
+
self.trainer = kwargs.get('trainer')
|
577 |
+
|
578 |
+
def on_epoch_begin(self, args, state, control, **kwargs):
|
579 |
+
if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
|
580 |
+
return
|
581 |
+
train_dataloader = self.trainer.train_dataloader
|
582 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
583 |
+
pass # set_epoch() is handled by the Trainer
|
584 |
+
elif isinstance(train_dataloader.dataset, IterableDataset):
|
585 |
+
train_dataloader.dataset.set_epoch(train_dataloader.dataset._epoch + 1)
|
586 |
+
|
587 |
+
# Initialize Trainer
|
588 |
+
trainer = Seq2SeqTrainer(
|
589 |
+
model=model,
|
590 |
+
args=training_args,
|
591 |
+
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
592 |
+
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
593 |
+
processing_class=feature_extractor,
|
594 |
+
data_collator=data_collator,
|
595 |
+
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
596 |
+
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
597 |
+
)
|
598 |
+
|
599 |
+
# 12. Training
|
600 |
+
if training_args.do_train:
|
601 |
+
checkpoint = None
|
602 |
+
if training_args.resume_from_checkpoint is not None:
|
603 |
+
checkpoint = training_args.resume_from_checkpoint
|
604 |
+
elif last_checkpoint is not None:
|
605 |
+
checkpoint = last_checkpoint
|
606 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
607 |
+
trainer.save_model() # Saves the feature extractor too for easy upload
|
608 |
+
|
609 |
+
metrics = train_result.metrics
|
610 |
+
if data_args.max_train_samples:
|
611 |
+
metrics["train_samples"] = data_args.max_train_samples
|
612 |
+
trainer.log_metrics("train", metrics)
|
613 |
+
trainer.save_metrics("train", metrics)
|
614 |
+
trainer.save_state()
|
615 |
+
|
616 |
+
# 13. Evaluation
|
617 |
+
results = {}
|
618 |
+
if training_args.do_eval:
|
619 |
+
logger.info("*** Evaluate ***")
|
620 |
+
metrics = trainer.evaluate(
|
621 |
+
metric_key_prefix="eval",
|
622 |
+
max_length=training_args.generation_max_length,
|
623 |
+
num_beams=training_args.generation_num_beams,
|
624 |
+
)
|
625 |
+
if data_args.max_eval_samples:
|
626 |
+
metrics["eval_samples"] = data_args.max_eval_samples
|
627 |
+
|
628 |
+
trainer.log_metrics("eval", metrics)
|
629 |
+
trainer.save_metrics("eval", metrics)
|
630 |
+
|
631 |
+
# 14. Write Training Stats
|
632 |
+
kwargs = {
|
633 |
+
"finetuned_from": model_args.model_name_or_path,
|
634 |
+
"tasks": "automatic-speech-recognition",
|
635 |
+
"tags": "whisper-event",
|
636 |
+
}
|
637 |
+
if data_args.dataset_name is not None:
|
638 |
+
kwargs["dataset_tags"] = data_args.dataset_name
|
639 |
+
if data_args.dataset_config_name is not None:
|
640 |
+
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
641 |
+
else:
|
642 |
+
kwargs["dataset"] = data_args.dataset_name
|
643 |
+
if "common_voice" in data_args.dataset_name:
|
644 |
+
kwargs["language"] = data_args.dataset_config_name.split('-')[0]
|
645 |
+
if model_args.model_index_name is not None:
|
646 |
+
kwargs["model_name"] = model_args.model_index_name
|
647 |
+
|
648 |
+
if training_args.push_to_hub:
|
649 |
+
trainer.push_to_hub(**kwargs)
|
650 |
+
else:
|
651 |
+
trainer.create_model_card(**kwargs)
|
652 |
+
|
653 |
+
return results
|
654 |
+
|
655 |
+
|
656 |
+
if __name__ == "__main__":
|
657 |
+
main()
|
tokenizer_config.json
CHANGED
@@ -12980,6 +12980,7 @@
|
|
12980 |
"clean_up_tokenization_spaces": true,
|
12981 |
"eos_token": "<|endoftext|>",
|
12982 |
"errors": "replace",
|
|
|
12983 |
"model_max_length": 1024,
|
12984 |
"pad_token": "<|endoftext|>",
|
12985 |
"processor_class": "WhisperProcessor",
|
|
|
12980 |
"clean_up_tokenization_spaces": true,
|
12981 |
"eos_token": "<|endoftext|>",
|
12982 |
"errors": "replace",
|
12983 |
+
"extra_special_tokens": {},
|
12984 |
"model_max_length": 1024,
|
12985 |
"pad_token": "<|endoftext|>",
|
12986 |
"processor_class": "WhisperProcessor",
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37d41e6c93c9164dab27b25a4957996293e07bbed9895811c22360ffbda7ebbf
|
3 |
+
size 5432
|
wandb/debug-internal.log
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
|
2 |
+
{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
|
3 |
+
{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
|
4 |
+
{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
|
5 |
+
{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
|
6 |
+
{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
|
7 |
+
{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
|
wandb/debug.log
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
|
3 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
|
7 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
|
8 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():756] calling init triggers
|
9 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
+
config: {'_wandb': {}}
|
11 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():789] starting backend
|
12 |
+
2025-02-12 15:27:10,107 INFO MainThread:243546 [wandb_init.py:init():793] sending inform_init request
|
13 |
+
2025-02-12 15:27:10,112 INFO MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
+
2025-02-12 15:27:10,113 INFO MainThread:243546 [wandb_init.py:init():808] backend started and connected
|
15 |
+
2025-02-12 15:27:10,115 INFO MainThread:243546 [wandb_init.py:init():901] updated telemetry
|
16 |
+
2025-02-12 15:27:10,122 INFO MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
+
2025-02-12 15:27:10,584 INFO MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
|
18 |
+
2025-02-12 15:27:10,691 INFO MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
+
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
+
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
+
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
+
2025-02-12 15:27:10,694 INFO MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
+
2025-02-12 15:27:10,698 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
+
2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
|
25 |
+
2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
wandb/run-20250212_121751-d4i88lzt/files/config.yaml
ADDED
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_attn_implementation_autoset:
|
2 |
+
value: true
|
3 |
+
_name_or_path:
|
4 |
+
value: openai/whisper-small
|
5 |
+
_wandb:
|
6 |
+
value:
|
7 |
+
cli_version: 0.19.6
|
8 |
+
m:
|
9 |
+
- "1": train/global_step
|
10 |
+
"6":
|
11 |
+
- 3
|
12 |
+
"7": []
|
13 |
+
python_version: 3.12.3
|
14 |
+
t:
|
15 |
+
"1":
|
16 |
+
- 1
|
17 |
+
- 5
|
18 |
+
- 11
|
19 |
+
- 49
|
20 |
+
- 51
|
21 |
+
- 53
|
22 |
+
- 55
|
23 |
+
- 71
|
24 |
+
- 100
|
25 |
+
"2":
|
26 |
+
- 1
|
27 |
+
- 5
|
28 |
+
- 11
|
29 |
+
- 49
|
30 |
+
- 51
|
31 |
+
- 53
|
32 |
+
- 55
|
33 |
+
- 71
|
34 |
+
- 100
|
35 |
+
"3":
|
36 |
+
- 7
|
37 |
+
- 13
|
38 |
+
- 19
|
39 |
+
- 23
|
40 |
+
- 55
|
41 |
+
- 66
|
42 |
+
"4": 3.12.3
|
43 |
+
"5": 0.19.6
|
44 |
+
"6": 4.49.0.dev0
|
45 |
+
"8":
|
46 |
+
- 5
|
47 |
+
"9":
|
48 |
+
"1": transformers_trainer
|
49 |
+
"12": 0.19.6
|
50 |
+
"13": linux-x86_64
|
51 |
+
accelerator_config:
|
52 |
+
value:
|
53 |
+
dispatch_batches: null
|
54 |
+
even_batches: true
|
55 |
+
gradient_accumulation_kwargs: null
|
56 |
+
non_blocking: false
|
57 |
+
split_batches: false
|
58 |
+
use_seedable_sampler: true
|
59 |
+
activation_dropout:
|
60 |
+
value: 0
|
61 |
+
activation_function:
|
62 |
+
value: gelu
|
63 |
+
adafactor:
|
64 |
+
value: false
|
65 |
+
adam_beta1:
|
66 |
+
value: 0.9
|
67 |
+
adam_beta2:
|
68 |
+
value: 0.999
|
69 |
+
adam_epsilon:
|
70 |
+
value: 1e-08
|
71 |
+
add_cross_attention:
|
72 |
+
value: false
|
73 |
+
apply_spec_augment:
|
74 |
+
value: false
|
75 |
+
architectures:
|
76 |
+
value:
|
77 |
+
- WhisperForConditionalGeneration
|
78 |
+
attention_dropout:
|
79 |
+
value: 0
|
80 |
+
auto_find_batch_size:
|
81 |
+
value: false
|
82 |
+
average_tokens_across_devices:
|
83 |
+
value: false
|
84 |
+
bad_words_ids:
|
85 |
+
value: null
|
86 |
+
batch_eval_metrics:
|
87 |
+
value: false
|
88 |
+
begin_suppress_tokens:
|
89 |
+
value:
|
90 |
+
- 220
|
91 |
+
- 50257
|
92 |
+
bf16:
|
93 |
+
value: false
|
94 |
+
bf16_full_eval:
|
95 |
+
value: false
|
96 |
+
bos_token_id:
|
97 |
+
value: 50257
|
98 |
+
chunk_size_feed_forward:
|
99 |
+
value: 0
|
100 |
+
classifier_proj_size:
|
101 |
+
value: 256
|
102 |
+
cross_attention_hidden_size:
|
103 |
+
value: null
|
104 |
+
d_model:
|
105 |
+
value: 768
|
106 |
+
data_seed:
|
107 |
+
value: null
|
108 |
+
dataloader_drop_last:
|
109 |
+
value: false
|
110 |
+
dataloader_num_workers:
|
111 |
+
value: 0
|
112 |
+
dataloader_persistent_workers:
|
113 |
+
value: false
|
114 |
+
dataloader_pin_memory:
|
115 |
+
value: true
|
116 |
+
dataloader_prefetch_factor:
|
117 |
+
value: null
|
118 |
+
ddp_backend:
|
119 |
+
value: null
|
120 |
+
ddp_broadcast_buffers:
|
121 |
+
value: null
|
122 |
+
ddp_bucket_cap_mb:
|
123 |
+
value: null
|
124 |
+
ddp_find_unused_parameters:
|
125 |
+
value: null
|
126 |
+
ddp_timeout:
|
127 |
+
value: 1800
|
128 |
+
debug:
|
129 |
+
value: []
|
130 |
+
decoder_attention_heads:
|
131 |
+
value: 12
|
132 |
+
decoder_ffn_dim:
|
133 |
+
value: 3072
|
134 |
+
decoder_layerdrop:
|
135 |
+
value: 0
|
136 |
+
decoder_layers:
|
137 |
+
value: 12
|
138 |
+
decoder_start_token_id:
|
139 |
+
value: 50258
|
140 |
+
deepspeed:
|
141 |
+
value: null
|
142 |
+
disable_tqdm:
|
143 |
+
value: false
|
144 |
+
dispatch_batches:
|
145 |
+
value: null
|
146 |
+
diversity_penalty:
|
147 |
+
value: 0
|
148 |
+
do_eval:
|
149 |
+
value: true
|
150 |
+
do_predict:
|
151 |
+
value: false
|
152 |
+
do_sample:
|
153 |
+
value: false
|
154 |
+
do_train:
|
155 |
+
value: true
|
156 |
+
dropout:
|
157 |
+
value: 0
|
158 |
+
early_stopping:
|
159 |
+
value: false
|
160 |
+
encoder_attention_heads:
|
161 |
+
value: 12
|
162 |
+
encoder_ffn_dim:
|
163 |
+
value: 3072
|
164 |
+
encoder_layerdrop:
|
165 |
+
value: 0
|
166 |
+
encoder_layers:
|
167 |
+
value: 12
|
168 |
+
encoder_no_repeat_ngram_size:
|
169 |
+
value: 0
|
170 |
+
eos_token_id:
|
171 |
+
value: 50257
|
172 |
+
eval_accumulation_steps:
|
173 |
+
value: null
|
174 |
+
eval_delay:
|
175 |
+
value: 0
|
176 |
+
eval_do_concat_batches:
|
177 |
+
value: true
|
178 |
+
eval_on_start:
|
179 |
+
value: false
|
180 |
+
eval_steps:
|
181 |
+
value: 1000
|
182 |
+
eval_strategy:
|
183 |
+
value: steps
|
184 |
+
eval_use_gather_object:
|
185 |
+
value: false
|
186 |
+
evaluation_strategy:
|
187 |
+
value: steps
|
188 |
+
exponential_decay_length_penalty:
|
189 |
+
value: null
|
190 |
+
finetuning_task:
|
191 |
+
value: null
|
192 |
+
forced_bos_token_id:
|
193 |
+
value: null
|
194 |
+
forced_decoder_ids:
|
195 |
+
value: null
|
196 |
+
forced_eos_token_id:
|
197 |
+
value: null
|
198 |
+
fp16:
|
199 |
+
value: true
|
200 |
+
fp16_backend:
|
201 |
+
value: auto
|
202 |
+
fp16_full_eval:
|
203 |
+
value: false
|
204 |
+
fp16_opt_level:
|
205 |
+
value: O1
|
206 |
+
fsdp:
|
207 |
+
value: []
|
208 |
+
fsdp_config:
|
209 |
+
value:
|
210 |
+
min_num_params: 0
|
211 |
+
xla: false
|
212 |
+
xla_fsdp_grad_ckpt: false
|
213 |
+
xla_fsdp_v2: false
|
214 |
+
fsdp_min_num_params:
|
215 |
+
value: 0
|
216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
217 |
+
value: null
|
218 |
+
full_determinism:
|
219 |
+
value: false
|
220 |
+
generation_config:
|
221 |
+
value: null
|
222 |
+
generation_max_length:
|
223 |
+
value: 225
|
224 |
+
generation_num_beams:
|
225 |
+
value: null
|
226 |
+
gradient_accumulation_steps:
|
227 |
+
value: 1
|
228 |
+
gradient_checkpointing:
|
229 |
+
value: true
|
230 |
+
gradient_checkpointing_kwargs:
|
231 |
+
value: null
|
232 |
+
greater_is_better:
|
233 |
+
value: false
|
234 |
+
group_by_length:
|
235 |
+
value: false
|
236 |
+
half_precision_backend:
|
237 |
+
value: auto
|
238 |
+
hub_always_push:
|
239 |
+
value: false
|
240 |
+
hub_model_id:
|
241 |
+
value: null
|
242 |
+
hub_private_repo:
|
243 |
+
value: null
|
244 |
+
hub_strategy:
|
245 |
+
value: every_save
|
246 |
+
hub_token:
|
247 |
+
value: <HUB_TOKEN>
|
248 |
+
id2label:
|
249 |
+
value:
|
250 |
+
"0": LABEL_0
|
251 |
+
"1": LABEL_1
|
252 |
+
ignore_data_skip:
|
253 |
+
value: false
|
254 |
+
include_for_metrics:
|
255 |
+
value: []
|
256 |
+
include_inputs_for_metrics:
|
257 |
+
value: false
|
258 |
+
include_num_input_tokens_seen:
|
259 |
+
value: false
|
260 |
+
include_tokens_per_second:
|
261 |
+
value: false
|
262 |
+
init_std:
|
263 |
+
value: 0.02
|
264 |
+
is_decoder:
|
265 |
+
value: false
|
266 |
+
is_encoder_decoder:
|
267 |
+
value: true
|
268 |
+
jit_mode_eval:
|
269 |
+
value: false
|
270 |
+
label_names:
|
271 |
+
value: null
|
272 |
+
label_smoothing_factor:
|
273 |
+
value: 0
|
274 |
+
label2id:
|
275 |
+
value:
|
276 |
+
LABEL_0: 0
|
277 |
+
LABEL_1: 1
|
278 |
+
learning_rate:
|
279 |
+
value: 1e-05
|
280 |
+
length_column_name:
|
281 |
+
value: input_length
|
282 |
+
length_penalty:
|
283 |
+
value: 1
|
284 |
+
load_best_model_at_end:
|
285 |
+
value: true
|
286 |
+
local_rank:
|
287 |
+
value: 0
|
288 |
+
log_level:
|
289 |
+
value: passive
|
290 |
+
log_level_replica:
|
291 |
+
value: warning
|
292 |
+
log_on_each_node:
|
293 |
+
value: true
|
294 |
+
logging_dir:
|
295 |
+
value: ./runs/Feb12_12-17-27_tknika
|
296 |
+
logging_first_step:
|
297 |
+
value: false
|
298 |
+
logging_nan_inf_filter:
|
299 |
+
value: true
|
300 |
+
logging_steps:
|
301 |
+
value: 25
|
302 |
+
logging_strategy:
|
303 |
+
value: steps
|
304 |
+
lr_scheduler_type:
|
305 |
+
value: linear
|
306 |
+
mask_feature_length:
|
307 |
+
value: 10
|
308 |
+
mask_feature_min_masks:
|
309 |
+
value: 0
|
310 |
+
mask_feature_prob:
|
311 |
+
value: 0
|
312 |
+
mask_time_length:
|
313 |
+
value: 10
|
314 |
+
mask_time_min_masks:
|
315 |
+
value: 2
|
316 |
+
mask_time_prob:
|
317 |
+
value: 0.05
|
318 |
+
max_grad_norm:
|
319 |
+
value: 1
|
320 |
+
max_length:
|
321 |
+
value: 448
|
322 |
+
max_source_positions:
|
323 |
+
value: 1500
|
324 |
+
max_steps:
|
325 |
+
value: 8000
|
326 |
+
max_target_positions:
|
327 |
+
value: 448
|
328 |
+
median_filter_width:
|
329 |
+
value: 7
|
330 |
+
metric_for_best_model:
|
331 |
+
value: wer
|
332 |
+
min_length:
|
333 |
+
value: 0
|
334 |
+
model/num_parameters:
|
335 |
+
value: 241734912
|
336 |
+
model_type:
|
337 |
+
value: whisper
|
338 |
+
mp_parameters:
|
339 |
+
value: ""
|
340 |
+
neftune_noise_alpha:
|
341 |
+
value: null
|
342 |
+
no_cuda:
|
343 |
+
value: false
|
344 |
+
no_repeat_ngram_size:
|
345 |
+
value: 0
|
346 |
+
num_beam_groups:
|
347 |
+
value: 1
|
348 |
+
num_beams:
|
349 |
+
value: 1
|
350 |
+
num_hidden_layers:
|
351 |
+
value: 12
|
352 |
+
num_mel_bins:
|
353 |
+
value: 80
|
354 |
+
num_return_sequences:
|
355 |
+
value: 1
|
356 |
+
num_train_epochs:
|
357 |
+
value: 3
|
358 |
+
optim:
|
359 |
+
value: adamw_torch
|
360 |
+
optim_args:
|
361 |
+
value: null
|
362 |
+
optim_target_modules:
|
363 |
+
value: null
|
364 |
+
output_attentions:
|
365 |
+
value: false
|
366 |
+
output_dir:
|
367 |
+
value: ./
|
368 |
+
output_hidden_states:
|
369 |
+
value: false
|
370 |
+
output_scores:
|
371 |
+
value: false
|
372 |
+
overwrite_output_dir:
|
373 |
+
value: true
|
374 |
+
pad_token_id:
|
375 |
+
value: 50257
|
376 |
+
past_index:
|
377 |
+
value: -1
|
378 |
+
per_device_eval_batch_size:
|
379 |
+
value: 16
|
380 |
+
per_device_train_batch_size:
|
381 |
+
value: 32
|
382 |
+
per_gpu_eval_batch_size:
|
383 |
+
value: null
|
384 |
+
per_gpu_train_batch_size:
|
385 |
+
value: null
|
386 |
+
predict_with_generate:
|
387 |
+
value: true
|
388 |
+
prediction_loss_only:
|
389 |
+
value: false
|
390 |
+
prefix:
|
391 |
+
value: null
|
392 |
+
problem_type:
|
393 |
+
value: null
|
394 |
+
push_to_hub:
|
395 |
+
value: true
|
396 |
+
push_to_hub_model_id:
|
397 |
+
value: null
|
398 |
+
push_to_hub_organization:
|
399 |
+
value: null
|
400 |
+
push_to_hub_token:
|
401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
402 |
+
ray_scope:
|
403 |
+
value: last
|
404 |
+
remove_invalid_values:
|
405 |
+
value: false
|
406 |
+
remove_unused_columns:
|
407 |
+
value: true
|
408 |
+
repetition_penalty:
|
409 |
+
value: 1
|
410 |
+
report_to:
|
411 |
+
value:
|
412 |
+
- wandb
|
413 |
+
restore_callback_states_from_checkpoint:
|
414 |
+
value: false
|
415 |
+
resume_from_checkpoint:
|
416 |
+
value: null
|
417 |
+
return_dict:
|
418 |
+
value: true
|
419 |
+
return_dict_in_generate:
|
420 |
+
value: false
|
421 |
+
run_name:
|
422 |
+
value: whisper-small-eu
|
423 |
+
save_on_each_node:
|
424 |
+
value: false
|
425 |
+
save_only_model:
|
426 |
+
value: false
|
427 |
+
save_safetensors:
|
428 |
+
value: true
|
429 |
+
save_steps:
|
430 |
+
value: 1000
|
431 |
+
save_strategy:
|
432 |
+
value: steps
|
433 |
+
save_total_limit:
|
434 |
+
value: null
|
435 |
+
scale_embedding:
|
436 |
+
value: false
|
437 |
+
seed:
|
438 |
+
value: 42
|
439 |
+
sep_token_id:
|
440 |
+
value: null
|
441 |
+
skip_memory_metrics:
|
442 |
+
value: true
|
443 |
+
sortish_sampler:
|
444 |
+
value: false
|
445 |
+
split_batches:
|
446 |
+
value: null
|
447 |
+
suppress_tokens:
|
448 |
+
value: null
|
449 |
+
task_specific_params:
|
450 |
+
value: null
|
451 |
+
temperature:
|
452 |
+
value: 1
|
453 |
+
tf_legacy_loss:
|
454 |
+
value: false
|
455 |
+
tf32:
|
456 |
+
value: null
|
457 |
+
tie_encoder_decoder:
|
458 |
+
value: false
|
459 |
+
tie_word_embeddings:
|
460 |
+
value: true
|
461 |
+
tokenizer_class:
|
462 |
+
value: null
|
463 |
+
top_k:
|
464 |
+
value: 50
|
465 |
+
top_p:
|
466 |
+
value: 1
|
467 |
+
torch_compile:
|
468 |
+
value: false
|
469 |
+
torch_compile_backend:
|
470 |
+
value: null
|
471 |
+
torch_compile_mode:
|
472 |
+
value: null
|
473 |
+
torch_dtype:
|
474 |
+
value: float32
|
475 |
+
torch_empty_cache_steps:
|
476 |
+
value: null
|
477 |
+
torchdynamo:
|
478 |
+
value: null
|
479 |
+
torchscript:
|
480 |
+
value: false
|
481 |
+
tpu_metrics_debug:
|
482 |
+
value: false
|
483 |
+
tpu_num_cores:
|
484 |
+
value: null
|
485 |
+
transformers_version:
|
486 |
+
value: 4.49.0.dev0
|
487 |
+
typical_p:
|
488 |
+
value: 1
|
489 |
+
use_bfloat16:
|
490 |
+
value: false
|
491 |
+
use_cache:
|
492 |
+
value: false
|
493 |
+
use_cpu:
|
494 |
+
value: false
|
495 |
+
use_ipex:
|
496 |
+
value: false
|
497 |
+
use_legacy_prediction_loop:
|
498 |
+
value: false
|
499 |
+
use_liger_kernel:
|
500 |
+
value: false
|
501 |
+
use_mps_device:
|
502 |
+
value: false
|
503 |
+
use_weighted_layer_sum:
|
504 |
+
value: false
|
505 |
+
vocab_size:
|
506 |
+
value: 51865
|
507 |
+
warmup_ratio:
|
508 |
+
value: 0
|
509 |
+
warmup_steps:
|
510 |
+
value: 500
|
511 |
+
weight_decay:
|
512 |
+
value: 0
|
wandb/run-20250212_121751-d4i88lzt/files/output.log
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
+
main()
|
4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
+
return inner_training_loop(
|
9 |
+
^^^^^^^^^^^^^^^^^^^^
|
10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
+
result = getattr(callback, event)(
|
18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_121751-d4i88lzt/files/requirements.txt
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiosignal==1.3.2
|
2 |
+
Markdown==3.7
|
3 |
+
more-itertools==10.6.0
|
4 |
+
requests==2.32.3
|
5 |
+
sentry-sdk==2.21.0
|
6 |
+
torchaudio==2.6.0
|
7 |
+
charset-normalizer==3.4.1
|
8 |
+
docker-pycreds==0.4.0
|
9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
10 |
+
PyYAML==6.0.2
|
11 |
+
librosa==0.10.2.post1
|
12 |
+
soxr==0.5.0.post1
|
13 |
+
multiprocess==0.70.16
|
14 |
+
setuptools==75.8.0
|
15 |
+
nvidia-cufft-cu12==11.2.1.3
|
16 |
+
joblib==1.4.2
|
17 |
+
pytz==2025.1
|
18 |
+
pip==24.0
|
19 |
+
scikit-learn==1.6.1
|
20 |
+
certifi==2025.1.31
|
21 |
+
jiwer==3.1.0
|
22 |
+
regex==2024.11.6
|
23 |
+
annotated-types==0.7.0
|
24 |
+
grpcio==1.70.0
|
25 |
+
msgpack==1.1.0
|
26 |
+
mpmath==1.3.0
|
27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
28 |
+
soundfile==0.13.1
|
29 |
+
dill==0.3.8
|
30 |
+
nvidia-nvtx-cu12==12.4.127
|
31 |
+
six==1.17.0
|
32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
+
pyarrow==19.0.0
|
34 |
+
nvidia-nccl-cu12==2.21.5
|
35 |
+
psutil==6.1.1
|
36 |
+
decorator==5.1.1
|
37 |
+
llvmlite==0.44.0
|
38 |
+
frozenlist==1.5.0
|
39 |
+
pydantic==2.10.6
|
40 |
+
networkx==3.4.2
|
41 |
+
idna==3.10
|
42 |
+
wandb==0.19.6
|
43 |
+
aiohttp==3.11.12
|
44 |
+
RapidFuzz==3.12.1
|
45 |
+
pandas==2.2.3
|
46 |
+
python-dateutil==2.9.0.post0
|
47 |
+
numpy==2.1.3
|
48 |
+
tokenizers==0.21.0
|
49 |
+
nvidia-cusparselt-cu12==0.6.2
|
50 |
+
typing_extensions==4.12.2
|
51 |
+
urllib3==2.3.0
|
52 |
+
setproctitle==1.3.4
|
53 |
+
tzdata==2025.1
|
54 |
+
sympy==1.13.1
|
55 |
+
pooch==1.8.2
|
56 |
+
click==8.1.8
|
57 |
+
pydantic_core==2.27.2
|
58 |
+
MarkupSafe==3.0.2
|
59 |
+
scipy==1.15.1
|
60 |
+
accelerate==1.3.0
|
61 |
+
tensorboard==2.19.0
|
62 |
+
protobuf==5.29.3
|
63 |
+
gitdb==4.0.12
|
64 |
+
smmap==5.0.2
|
65 |
+
absl-py==2.1.0
|
66 |
+
tqdm==4.67.1
|
67 |
+
yarl==1.18.3
|
68 |
+
pycparser==2.22
|
69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
70 |
+
attrs==25.1.0
|
71 |
+
lazy_loader==0.4
|
72 |
+
tensorboard-data-server==0.7.2
|
73 |
+
threadpoolctl==3.5.0
|
74 |
+
GitPython==3.1.44
|
75 |
+
safetensors==0.5.2
|
76 |
+
fsspec==2024.12.0
|
77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
+
filelock==3.17.0
|
79 |
+
aiohappyeyeballs==2.4.6
|
80 |
+
packaging==24.2
|
81 |
+
datasets==3.2.1.dev0
|
82 |
+
audioread==3.0.1
|
83 |
+
propcache==0.2.1
|
84 |
+
transformers==4.49.0.dev0
|
85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
+
cffi==1.17.1
|
87 |
+
evaluate==0.4.3
|
88 |
+
Werkzeug==3.1.3
|
89 |
+
huggingface-hub==0.28.1
|
90 |
+
Jinja2==3.1.5
|
91 |
+
torch==2.6.0
|
92 |
+
nvidia-curand-cu12==10.3.5.147
|
93 |
+
xxhash==3.5.0
|
94 |
+
platformdirs==4.3.6
|
95 |
+
multidict==6.1.0
|
96 |
+
nvidia-cublas-cu12==12.4.5.8
|
97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
98 |
+
triton==3.2.0
|
99 |
+
numba==0.61.0
|
100 |
+
importlib_metadata==8.0.0
|
101 |
+
platformdirs==4.2.2
|
102 |
+
typeguard==4.3.0
|
103 |
+
more-itertools==10.3.0
|
104 |
+
tomli==2.0.1
|
105 |
+
autocommand==2.2.2
|
106 |
+
zipp==3.19.2
|
107 |
+
typing_extensions==4.12.2
|
108 |
+
backports.tarfile==1.2.0
|
109 |
+
inflect==7.3.1
|
110 |
+
jaraco.text==3.12.1
|
111 |
+
wheel==0.43.0
|
112 |
+
packaging==24.2
|
113 |
+
jaraco.collections==5.1.0
|
114 |
+
jaraco.functools==4.0.1
|
115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "CPython 3.12.3",
|
4 |
+
"startedAt": "2025-02-12T12:17:51.527114Z",
|
5 |
+
"args": [
|
6 |
+
"--model_name_or_path=openai/whisper-small",
|
7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
+
"--language=basque",
|
9 |
+
"--train_split_name=train",
|
10 |
+
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
11 |
+
"--model_index_name=Whisper Small Basque",
|
12 |
+
"--max_steps=8000",
|
13 |
+
"--output_dir=./",
|
14 |
+
"--per_device_train_batch_size=32",
|
15 |
+
"--per_device_eval_batch_size=16",
|
16 |
+
"--gradient_accumulation_steps=1",
|
17 |
+
"--logging_steps=25",
|
18 |
+
"--learning_rate=1e-5",
|
19 |
+
"--warmup_steps=500",
|
20 |
+
"--evaluation_strategy=steps",
|
21 |
+
"--eval_steps=1000",
|
22 |
+
"--save_strategy=steps",
|
23 |
+
"--save_steps=1000",
|
24 |
+
"--generation_max_length=225",
|
25 |
+
"--length_column_name=input_length",
|
26 |
+
"--max_duration_in_seconds=30",
|
27 |
+
"--text_column_name=sentence",
|
28 |
+
"--freeze_feature_encoder=False",
|
29 |
+
"--report_to=tensorboard",
|
30 |
+
"--metric_for_best_model=wer",
|
31 |
+
"--greater_is_better=False",
|
32 |
+
"--load_best_model_at_end",
|
33 |
+
"--gradient_checkpointing",
|
34 |
+
"--fp16",
|
35 |
+
"--overwrite_output_dir",
|
36 |
+
"--do_train",
|
37 |
+
"--do_eval",
|
38 |
+
"--predict_with_generate",
|
39 |
+
"--do_normalize_eval",
|
40 |
+
"--streaming",
|
41 |
+
"--use_auth_token",
|
42 |
+
"--push_to_hub",
|
43 |
+
"--report_to",
|
44 |
+
"wandb",
|
45 |
+
"--run_name",
|
46 |
+
"whisper-small-eu"
|
47 |
+
],
|
48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
+
"git": {
|
51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
+
},
|
54 |
+
"email": "[email protected]",
|
55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
+
"host": "tknika",
|
57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
+
"cpu_count": 8,
|
60 |
+
"cpu_count_logical": 8,
|
61 |
+
"gpu": "NVIDIA L40-48Q",
|
62 |
+
"gpu_count": 1,
|
63 |
+
"disk": {
|
64 |
+
"/": {
|
65 |
+
"total": "525987168256",
|
66 |
+
"used": "297346564096"
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"memory": {
|
70 |
+
"total": "33654022144"
|
71 |
+
},
|
72 |
+
"cpu": {
|
73 |
+
"count": 8,
|
74 |
+
"countLogical": 8
|
75 |
+
},
|
76 |
+
"gpu_nvidia": [
|
77 |
+
{
|
78 |
+
"name": "NVIDIA L40-48Q",
|
79 |
+
"memoryTotal": "51539607552",
|
80 |
+
"cudaCores": 18176,
|
81 |
+
"architecture": "Ada"
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"cudaVersion": "12.4"
|
85 |
+
}
|
wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
+
{"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392}
|
3 |
+
{"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}}
|
4 |
+
{"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"}
|
5 |
+
{"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
|
6 |
+
{"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
|
7 |
+
{"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"}
|
8 |
+
{"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"}
|
9 |
+
{"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"}
|
10 |
+
{"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"}
|
11 |
+
{"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"}
|
12 |
+
{"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"}
|
13 |
+
{"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"}
|
14 |
+
{"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"}
|
2 |
+
{"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"}
|
3 |
+
{"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"}
|
4 |
+
{"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"}
|
5 |
+
{"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"}
|
6 |
+
{"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"}
|
7 |
+
{"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
+
{"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"}
|
9 |
+
{"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
+
{"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
+
{"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
+
{"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"}
|
13 |
+
{"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"}
|
14 |
+
{"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"}
|
15 |
+
{"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"}
|
wandb/run-20250212_121751-d4i88lzt/logs/debug.log
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392
|
3 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
|
7 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
|
8 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:init():756] calling init triggers
|
9 |
+
2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
+
config: {'_wandb': {}}
|
11 |
+
2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():789] starting backend
|
12 |
+
2025-02-12 12:17:51,521 INFO MainThread:223392 [wandb_init.py:init():793] sending inform_init request
|
13 |
+
2025-02-12 12:17:51,526 INFO MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
+
2025-02-12 12:17:51,526 INFO MainThread:223392 [wandb_init.py:init():808] backend started and connected
|
15 |
+
2025-02-12 12:17:51,528 INFO MainThread:223392 [wandb_init.py:init():901] updated telemetry
|
16 |
+
2025-02-12 12:17:51,535 INFO MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
+
2025-02-12 12:17:51,944 INFO MainThread:223392 [wandb_init.py:init():994] starting run threads in backend
|
18 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
+
2025-02-12 12:17:52,051 INFO MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
+
2025-02-12 12:17:52,052 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
+
2025-02-12 12:17:52,054 INFO MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75ef87e92c00>>
|
25 |
+
2025-02-12 12:17:52,055 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
+
2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb
ADDED
Binary file (11.3 kB). View file
|
|
wandb/run-20250212_122637-v3d3ouvn/files/config.yaml
ADDED
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_attn_implementation_autoset:
|
2 |
+
value: true
|
3 |
+
_name_or_path:
|
4 |
+
value: openai/whisper-small
|
5 |
+
_wandb:
|
6 |
+
value:
|
7 |
+
cli_version: 0.19.6
|
8 |
+
m:
|
9 |
+
- "1": train/global_step
|
10 |
+
"6":
|
11 |
+
- 3
|
12 |
+
"7": []
|
13 |
+
python_version: 3.12.3
|
14 |
+
t:
|
15 |
+
"1":
|
16 |
+
- 1
|
17 |
+
- 5
|
18 |
+
- 11
|
19 |
+
- 49
|
20 |
+
- 51
|
21 |
+
- 53
|
22 |
+
- 55
|
23 |
+
- 71
|
24 |
+
- 100
|
25 |
+
"2":
|
26 |
+
- 1
|
27 |
+
- 5
|
28 |
+
- 11
|
29 |
+
- 49
|
30 |
+
- 51
|
31 |
+
- 53
|
32 |
+
- 55
|
33 |
+
- 71
|
34 |
+
- 100
|
35 |
+
"3":
|
36 |
+
- 7
|
37 |
+
- 13
|
38 |
+
- 19
|
39 |
+
- 23
|
40 |
+
- 55
|
41 |
+
- 66
|
42 |
+
"4": 3.12.3
|
43 |
+
"5": 0.19.6
|
44 |
+
"6": 4.49.0.dev0
|
45 |
+
"8":
|
46 |
+
- 5
|
47 |
+
"9":
|
48 |
+
"1": transformers_trainer
|
49 |
+
"12": 0.19.6
|
50 |
+
"13": linux-x86_64
|
51 |
+
accelerator_config:
|
52 |
+
value:
|
53 |
+
dispatch_batches: null
|
54 |
+
even_batches: true
|
55 |
+
gradient_accumulation_kwargs: null
|
56 |
+
non_blocking: false
|
57 |
+
split_batches: false
|
58 |
+
use_seedable_sampler: true
|
59 |
+
activation_dropout:
|
60 |
+
value: 0
|
61 |
+
activation_function:
|
62 |
+
value: gelu
|
63 |
+
adafactor:
|
64 |
+
value: false
|
65 |
+
adam_beta1:
|
66 |
+
value: 0.9
|
67 |
+
adam_beta2:
|
68 |
+
value: 0.999
|
69 |
+
adam_epsilon:
|
70 |
+
value: 1e-08
|
71 |
+
add_cross_attention:
|
72 |
+
value: false
|
73 |
+
apply_spec_augment:
|
74 |
+
value: false
|
75 |
+
architectures:
|
76 |
+
value:
|
77 |
+
- WhisperForConditionalGeneration
|
78 |
+
attention_dropout:
|
79 |
+
value: 0
|
80 |
+
auto_find_batch_size:
|
81 |
+
value: false
|
82 |
+
average_tokens_across_devices:
|
83 |
+
value: false
|
84 |
+
bad_words_ids:
|
85 |
+
value: null
|
86 |
+
batch_eval_metrics:
|
87 |
+
value: false
|
88 |
+
begin_suppress_tokens:
|
89 |
+
value:
|
90 |
+
- 220
|
91 |
+
- 50257
|
92 |
+
bf16:
|
93 |
+
value: false
|
94 |
+
bf16_full_eval:
|
95 |
+
value: false
|
96 |
+
bos_token_id:
|
97 |
+
value: 50257
|
98 |
+
chunk_size_feed_forward:
|
99 |
+
value: 0
|
100 |
+
classifier_proj_size:
|
101 |
+
value: 256
|
102 |
+
cross_attention_hidden_size:
|
103 |
+
value: null
|
104 |
+
d_model:
|
105 |
+
value: 768
|
106 |
+
data_seed:
|
107 |
+
value: null
|
108 |
+
dataloader_drop_last:
|
109 |
+
value: false
|
110 |
+
dataloader_num_workers:
|
111 |
+
value: 0
|
112 |
+
dataloader_persistent_workers:
|
113 |
+
value: false
|
114 |
+
dataloader_pin_memory:
|
115 |
+
value: true
|
116 |
+
dataloader_prefetch_factor:
|
117 |
+
value: null
|
118 |
+
ddp_backend:
|
119 |
+
value: null
|
120 |
+
ddp_broadcast_buffers:
|
121 |
+
value: null
|
122 |
+
ddp_bucket_cap_mb:
|
123 |
+
value: null
|
124 |
+
ddp_find_unused_parameters:
|
125 |
+
value: null
|
126 |
+
ddp_timeout:
|
127 |
+
value: 1800
|
128 |
+
debug:
|
129 |
+
value: []
|
130 |
+
decoder_attention_heads:
|
131 |
+
value: 12
|
132 |
+
decoder_ffn_dim:
|
133 |
+
value: 3072
|
134 |
+
decoder_layerdrop:
|
135 |
+
value: 0
|
136 |
+
decoder_layers:
|
137 |
+
value: 12
|
138 |
+
decoder_start_token_id:
|
139 |
+
value: 50258
|
140 |
+
deepspeed:
|
141 |
+
value: null
|
142 |
+
disable_tqdm:
|
143 |
+
value: false
|
144 |
+
dispatch_batches:
|
145 |
+
value: null
|
146 |
+
diversity_penalty:
|
147 |
+
value: 0
|
148 |
+
do_eval:
|
149 |
+
value: true
|
150 |
+
do_predict:
|
151 |
+
value: false
|
152 |
+
do_sample:
|
153 |
+
value: false
|
154 |
+
do_train:
|
155 |
+
value: true
|
156 |
+
dropout:
|
157 |
+
value: 0
|
158 |
+
early_stopping:
|
159 |
+
value: false
|
160 |
+
encoder_attention_heads:
|
161 |
+
value: 12
|
162 |
+
encoder_ffn_dim:
|
163 |
+
value: 3072
|
164 |
+
encoder_layerdrop:
|
165 |
+
value: 0
|
166 |
+
encoder_layers:
|
167 |
+
value: 12
|
168 |
+
encoder_no_repeat_ngram_size:
|
169 |
+
value: 0
|
170 |
+
eos_token_id:
|
171 |
+
value: 50257
|
172 |
+
eval_accumulation_steps:
|
173 |
+
value: null
|
174 |
+
eval_delay:
|
175 |
+
value: 0
|
176 |
+
eval_do_concat_batches:
|
177 |
+
value: true
|
178 |
+
eval_on_start:
|
179 |
+
value: false
|
180 |
+
eval_steps:
|
181 |
+
value: 1000
|
182 |
+
eval_strategy:
|
183 |
+
value: steps
|
184 |
+
eval_use_gather_object:
|
185 |
+
value: false
|
186 |
+
evaluation_strategy:
|
187 |
+
value: steps
|
188 |
+
exponential_decay_length_penalty:
|
189 |
+
value: null
|
190 |
+
finetuning_task:
|
191 |
+
value: null
|
192 |
+
forced_bos_token_id:
|
193 |
+
value: null
|
194 |
+
forced_decoder_ids:
|
195 |
+
value: null
|
196 |
+
forced_eos_token_id:
|
197 |
+
value: null
|
198 |
+
fp16:
|
199 |
+
value: true
|
200 |
+
fp16_backend:
|
201 |
+
value: auto
|
202 |
+
fp16_full_eval:
|
203 |
+
value: false
|
204 |
+
fp16_opt_level:
|
205 |
+
value: O1
|
206 |
+
fsdp:
|
207 |
+
value: []
|
208 |
+
fsdp_config:
|
209 |
+
value:
|
210 |
+
min_num_params: 0
|
211 |
+
xla: false
|
212 |
+
xla_fsdp_grad_ckpt: false
|
213 |
+
xla_fsdp_v2: false
|
214 |
+
fsdp_min_num_params:
|
215 |
+
value: 0
|
216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
217 |
+
value: null
|
218 |
+
full_determinism:
|
219 |
+
value: false
|
220 |
+
generation_config:
|
221 |
+
value: null
|
222 |
+
generation_max_length:
|
223 |
+
value: 225
|
224 |
+
generation_num_beams:
|
225 |
+
value: null
|
226 |
+
gradient_accumulation_steps:
|
227 |
+
value: 1
|
228 |
+
gradient_checkpointing:
|
229 |
+
value: true
|
230 |
+
gradient_checkpointing_kwargs:
|
231 |
+
value: null
|
232 |
+
greater_is_better:
|
233 |
+
value: false
|
234 |
+
group_by_length:
|
235 |
+
value: false
|
236 |
+
half_precision_backend:
|
237 |
+
value: auto
|
238 |
+
hub_always_push:
|
239 |
+
value: false
|
240 |
+
hub_model_id:
|
241 |
+
value: null
|
242 |
+
hub_private_repo:
|
243 |
+
value: null
|
244 |
+
hub_strategy:
|
245 |
+
value: every_save
|
246 |
+
hub_token:
|
247 |
+
value: <HUB_TOKEN>
|
248 |
+
id2label:
|
249 |
+
value:
|
250 |
+
"0": LABEL_0
|
251 |
+
"1": LABEL_1
|
252 |
+
ignore_data_skip:
|
253 |
+
value: false
|
254 |
+
include_for_metrics:
|
255 |
+
value: []
|
256 |
+
include_inputs_for_metrics:
|
257 |
+
value: false
|
258 |
+
include_num_input_tokens_seen:
|
259 |
+
value: false
|
260 |
+
include_tokens_per_second:
|
261 |
+
value: false
|
262 |
+
init_std:
|
263 |
+
value: 0.02
|
264 |
+
is_decoder:
|
265 |
+
value: false
|
266 |
+
is_encoder_decoder:
|
267 |
+
value: true
|
268 |
+
jit_mode_eval:
|
269 |
+
value: false
|
270 |
+
label_names:
|
271 |
+
value: null
|
272 |
+
label_smoothing_factor:
|
273 |
+
value: 0
|
274 |
+
label2id:
|
275 |
+
value:
|
276 |
+
LABEL_0: 0
|
277 |
+
LABEL_1: 1
|
278 |
+
learning_rate:
|
279 |
+
value: 1e-05
|
280 |
+
length_column_name:
|
281 |
+
value: input_length
|
282 |
+
length_penalty:
|
283 |
+
value: 1
|
284 |
+
load_best_model_at_end:
|
285 |
+
value: true
|
286 |
+
local_rank:
|
287 |
+
value: 0
|
288 |
+
log_level:
|
289 |
+
value: passive
|
290 |
+
log_level_replica:
|
291 |
+
value: warning
|
292 |
+
log_on_each_node:
|
293 |
+
value: true
|
294 |
+
logging_dir:
|
295 |
+
value: ./runs/Feb12_12-26-11_tknika
|
296 |
+
logging_first_step:
|
297 |
+
value: false
|
298 |
+
logging_nan_inf_filter:
|
299 |
+
value: true
|
300 |
+
logging_steps:
|
301 |
+
value: 25
|
302 |
+
logging_strategy:
|
303 |
+
value: steps
|
304 |
+
lr_scheduler_type:
|
305 |
+
value: linear
|
306 |
+
mask_feature_length:
|
307 |
+
value: 10
|
308 |
+
mask_feature_min_masks:
|
309 |
+
value: 0
|
310 |
+
mask_feature_prob:
|
311 |
+
value: 0
|
312 |
+
mask_time_length:
|
313 |
+
value: 10
|
314 |
+
mask_time_min_masks:
|
315 |
+
value: 2
|
316 |
+
mask_time_prob:
|
317 |
+
value: 0.05
|
318 |
+
max_grad_norm:
|
319 |
+
value: 1
|
320 |
+
max_length:
|
321 |
+
value: 448
|
322 |
+
max_source_positions:
|
323 |
+
value: 1500
|
324 |
+
max_steps:
|
325 |
+
value: 8000
|
326 |
+
max_target_positions:
|
327 |
+
value: 448
|
328 |
+
median_filter_width:
|
329 |
+
value: 7
|
330 |
+
metric_for_best_model:
|
331 |
+
value: wer
|
332 |
+
min_length:
|
333 |
+
value: 0
|
334 |
+
model/num_parameters:
|
335 |
+
value: 241734912
|
336 |
+
model_type:
|
337 |
+
value: whisper
|
338 |
+
mp_parameters:
|
339 |
+
value: ""
|
340 |
+
neftune_noise_alpha:
|
341 |
+
value: null
|
342 |
+
no_cuda:
|
343 |
+
value: false
|
344 |
+
no_repeat_ngram_size:
|
345 |
+
value: 0
|
346 |
+
num_beam_groups:
|
347 |
+
value: 1
|
348 |
+
num_beams:
|
349 |
+
value: 1
|
350 |
+
num_hidden_layers:
|
351 |
+
value: 12
|
352 |
+
num_mel_bins:
|
353 |
+
value: 80
|
354 |
+
num_return_sequences:
|
355 |
+
value: 1
|
356 |
+
num_train_epochs:
|
357 |
+
value: 3
|
358 |
+
optim:
|
359 |
+
value: adamw_torch
|
360 |
+
optim_args:
|
361 |
+
value: null
|
362 |
+
optim_target_modules:
|
363 |
+
value: null
|
364 |
+
output_attentions:
|
365 |
+
value: false
|
366 |
+
output_dir:
|
367 |
+
value: ./
|
368 |
+
output_hidden_states:
|
369 |
+
value: false
|
370 |
+
output_scores:
|
371 |
+
value: false
|
372 |
+
overwrite_output_dir:
|
373 |
+
value: true
|
374 |
+
pad_token_id:
|
375 |
+
value: 50257
|
376 |
+
past_index:
|
377 |
+
value: -1
|
378 |
+
per_device_eval_batch_size:
|
379 |
+
value: 16
|
380 |
+
per_device_train_batch_size:
|
381 |
+
value: 32
|
382 |
+
per_gpu_eval_batch_size:
|
383 |
+
value: null
|
384 |
+
per_gpu_train_batch_size:
|
385 |
+
value: null
|
386 |
+
predict_with_generate:
|
387 |
+
value: true
|
388 |
+
prediction_loss_only:
|
389 |
+
value: false
|
390 |
+
prefix:
|
391 |
+
value: null
|
392 |
+
problem_type:
|
393 |
+
value: null
|
394 |
+
push_to_hub:
|
395 |
+
value: true
|
396 |
+
push_to_hub_model_id:
|
397 |
+
value: null
|
398 |
+
push_to_hub_organization:
|
399 |
+
value: null
|
400 |
+
push_to_hub_token:
|
401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
402 |
+
ray_scope:
|
403 |
+
value: last
|
404 |
+
remove_invalid_values:
|
405 |
+
value: false
|
406 |
+
remove_unused_columns:
|
407 |
+
value: true
|
408 |
+
repetition_penalty:
|
409 |
+
value: 1
|
410 |
+
report_to:
|
411 |
+
value:
|
412 |
+
- wandb
|
413 |
+
restore_callback_states_from_checkpoint:
|
414 |
+
value: false
|
415 |
+
resume_from_checkpoint:
|
416 |
+
value: null
|
417 |
+
return_dict:
|
418 |
+
value: true
|
419 |
+
return_dict_in_generate:
|
420 |
+
value: false
|
421 |
+
run_name:
|
422 |
+
value: whisper-small-eu
|
423 |
+
save_on_each_node:
|
424 |
+
value: false
|
425 |
+
save_only_model:
|
426 |
+
value: false
|
427 |
+
save_safetensors:
|
428 |
+
value: true
|
429 |
+
save_steps:
|
430 |
+
value: 1000
|
431 |
+
save_strategy:
|
432 |
+
value: steps
|
433 |
+
save_total_limit:
|
434 |
+
value: null
|
435 |
+
scale_embedding:
|
436 |
+
value: false
|
437 |
+
seed:
|
438 |
+
value: 42
|
439 |
+
sep_token_id:
|
440 |
+
value: null
|
441 |
+
skip_memory_metrics:
|
442 |
+
value: true
|
443 |
+
sortish_sampler:
|
444 |
+
value: false
|
445 |
+
split_batches:
|
446 |
+
value: null
|
447 |
+
suppress_tokens:
|
448 |
+
value: null
|
449 |
+
task_specific_params:
|
450 |
+
value: null
|
451 |
+
temperature:
|
452 |
+
value: 1
|
453 |
+
tf_legacy_loss:
|
454 |
+
value: false
|
455 |
+
tf32:
|
456 |
+
value: null
|
457 |
+
tie_encoder_decoder:
|
458 |
+
value: false
|
459 |
+
tie_word_embeddings:
|
460 |
+
value: true
|
461 |
+
tokenizer_class:
|
462 |
+
value: null
|
463 |
+
top_k:
|
464 |
+
value: 50
|
465 |
+
top_p:
|
466 |
+
value: 1
|
467 |
+
torch_compile:
|
468 |
+
value: false
|
469 |
+
torch_compile_backend:
|
470 |
+
value: null
|
471 |
+
torch_compile_mode:
|
472 |
+
value: null
|
473 |
+
torch_dtype:
|
474 |
+
value: float32
|
475 |
+
torch_empty_cache_steps:
|
476 |
+
value: null
|
477 |
+
torchdynamo:
|
478 |
+
value: null
|
479 |
+
torchscript:
|
480 |
+
value: false
|
481 |
+
tpu_metrics_debug:
|
482 |
+
value: false
|
483 |
+
tpu_num_cores:
|
484 |
+
value: null
|
485 |
+
transformers_version:
|
486 |
+
value: 4.49.0.dev0
|
487 |
+
typical_p:
|
488 |
+
value: 1
|
489 |
+
use_bfloat16:
|
490 |
+
value: false
|
491 |
+
use_cache:
|
492 |
+
value: false
|
493 |
+
use_cpu:
|
494 |
+
value: false
|
495 |
+
use_ipex:
|
496 |
+
value: false
|
497 |
+
use_legacy_prediction_loop:
|
498 |
+
value: false
|
499 |
+
use_liger_kernel:
|
500 |
+
value: false
|
501 |
+
use_mps_device:
|
502 |
+
value: false
|
503 |
+
use_weighted_layer_sum:
|
504 |
+
value: false
|
505 |
+
vocab_size:
|
506 |
+
value: 51865
|
507 |
+
warmup_ratio:
|
508 |
+
value: 0
|
509 |
+
warmup_steps:
|
510 |
+
value: 500
|
511 |
+
weight_decay:
|
512 |
+
value: 0
|
wandb/run-20250212_122637-v3d3ouvn/files/output.log
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
+
main()
|
4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
+
return inner_training_loop(
|
9 |
+
^^^^^^^^^^^^^^^^^^^^
|
10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
+
result = getattr(callback, event)(
|
18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiosignal==1.3.2
|
2 |
+
Markdown==3.7
|
3 |
+
more-itertools==10.6.0
|
4 |
+
requests==2.32.3
|
5 |
+
sentry-sdk==2.21.0
|
6 |
+
torchaudio==2.6.0
|
7 |
+
charset-normalizer==3.4.1
|
8 |
+
docker-pycreds==0.4.0
|
9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
10 |
+
PyYAML==6.0.2
|
11 |
+
librosa==0.10.2.post1
|
12 |
+
soxr==0.5.0.post1
|
13 |
+
multiprocess==0.70.16
|
14 |
+
setuptools==75.8.0
|
15 |
+
nvidia-cufft-cu12==11.2.1.3
|
16 |
+
joblib==1.4.2
|
17 |
+
pytz==2025.1
|
18 |
+
pip==24.0
|
19 |
+
scikit-learn==1.6.1
|
20 |
+
certifi==2025.1.31
|
21 |
+
jiwer==3.1.0
|
22 |
+
regex==2024.11.6
|
23 |
+
annotated-types==0.7.0
|
24 |
+
grpcio==1.70.0
|
25 |
+
msgpack==1.1.0
|
26 |
+
mpmath==1.3.0
|
27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
28 |
+
soundfile==0.13.1
|
29 |
+
dill==0.3.8
|
30 |
+
nvidia-nvtx-cu12==12.4.127
|
31 |
+
six==1.17.0
|
32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
+
pyarrow==19.0.0
|
34 |
+
nvidia-nccl-cu12==2.21.5
|
35 |
+
psutil==6.1.1
|
36 |
+
decorator==5.1.1
|
37 |
+
llvmlite==0.44.0
|
38 |
+
frozenlist==1.5.0
|
39 |
+
pydantic==2.10.6
|
40 |
+
networkx==3.4.2
|
41 |
+
idna==3.10
|
42 |
+
wandb==0.19.6
|
43 |
+
aiohttp==3.11.12
|
44 |
+
RapidFuzz==3.12.1
|
45 |
+
pandas==2.2.3
|
46 |
+
python-dateutil==2.9.0.post0
|
47 |
+
numpy==2.1.3
|
48 |
+
tokenizers==0.21.0
|
49 |
+
nvidia-cusparselt-cu12==0.6.2
|
50 |
+
typing_extensions==4.12.2
|
51 |
+
urllib3==2.3.0
|
52 |
+
setproctitle==1.3.4
|
53 |
+
tzdata==2025.1
|
54 |
+
sympy==1.13.1
|
55 |
+
pooch==1.8.2
|
56 |
+
click==8.1.8
|
57 |
+
pydantic_core==2.27.2
|
58 |
+
MarkupSafe==3.0.2
|
59 |
+
scipy==1.15.1
|
60 |
+
accelerate==1.3.0
|
61 |
+
tensorboard==2.19.0
|
62 |
+
protobuf==5.29.3
|
63 |
+
gitdb==4.0.12
|
64 |
+
smmap==5.0.2
|
65 |
+
absl-py==2.1.0
|
66 |
+
tqdm==4.67.1
|
67 |
+
yarl==1.18.3
|
68 |
+
pycparser==2.22
|
69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
70 |
+
attrs==25.1.0
|
71 |
+
lazy_loader==0.4
|
72 |
+
tensorboard-data-server==0.7.2
|
73 |
+
threadpoolctl==3.5.0
|
74 |
+
GitPython==3.1.44
|
75 |
+
safetensors==0.5.2
|
76 |
+
fsspec==2024.12.0
|
77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
+
filelock==3.17.0
|
79 |
+
aiohappyeyeballs==2.4.6
|
80 |
+
packaging==24.2
|
81 |
+
datasets==3.2.1.dev0
|
82 |
+
audioread==3.0.1
|
83 |
+
propcache==0.2.1
|
84 |
+
transformers==4.49.0.dev0
|
85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
+
cffi==1.17.1
|
87 |
+
evaluate==0.4.3
|
88 |
+
Werkzeug==3.1.3
|
89 |
+
huggingface-hub==0.28.1
|
90 |
+
Jinja2==3.1.5
|
91 |
+
torch==2.6.0
|
92 |
+
nvidia-curand-cu12==10.3.5.147
|
93 |
+
xxhash==3.5.0
|
94 |
+
platformdirs==4.3.6
|
95 |
+
multidict==6.1.0
|
96 |
+
nvidia-cublas-cu12==12.4.5.8
|
97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
98 |
+
triton==3.2.0
|
99 |
+
numba==0.61.0
|
100 |
+
importlib_metadata==8.0.0
|
101 |
+
platformdirs==4.2.2
|
102 |
+
typeguard==4.3.0
|
103 |
+
more-itertools==10.3.0
|
104 |
+
tomli==2.0.1
|
105 |
+
autocommand==2.2.2
|
106 |
+
zipp==3.19.2
|
107 |
+
typing_extensions==4.12.2
|
108 |
+
backports.tarfile==1.2.0
|
109 |
+
inflect==7.3.1
|
110 |
+
jaraco.text==3.12.1
|
111 |
+
wheel==0.43.0
|
112 |
+
packaging==24.2
|
113 |
+
jaraco.collections==5.1.0
|
114 |
+
jaraco.functools==4.0.1
|
115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "CPython 3.12.3",
|
4 |
+
"startedAt": "2025-02-12T12:26:37.277902Z",
|
5 |
+
"args": [
|
6 |
+
"--model_name_or_path=openai/whisper-small",
|
7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
+
"--language=basque",
|
9 |
+
"--train_split_name=train",
|
10 |
+
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
11 |
+
"--model_index_name=Whisper Small Basque",
|
12 |
+
"--max_steps=8000",
|
13 |
+
"--output_dir=./",
|
14 |
+
"--per_device_train_batch_size=32",
|
15 |
+
"--per_device_eval_batch_size=16",
|
16 |
+
"--gradient_accumulation_steps=1",
|
17 |
+
"--logging_steps=25",
|
18 |
+
"--learning_rate=1e-5",
|
19 |
+
"--warmup_steps=500",
|
20 |
+
"--evaluation_strategy=steps",
|
21 |
+
"--eval_steps=1000",
|
22 |
+
"--save_strategy=steps",
|
23 |
+
"--save_steps=1000",
|
24 |
+
"--generation_max_length=225",
|
25 |
+
"--length_column_name=input_length",
|
26 |
+
"--max_duration_in_seconds=30",
|
27 |
+
"--text_column_name=sentence",
|
28 |
+
"--freeze_feature_encoder=False",
|
29 |
+
"--report_to=tensorboard",
|
30 |
+
"--metric_for_best_model=wer",
|
31 |
+
"--greater_is_better=False",
|
32 |
+
"--load_best_model_at_end",
|
33 |
+
"--gradient_checkpointing",
|
34 |
+
"--fp16",
|
35 |
+
"--overwrite_output_dir",
|
36 |
+
"--do_train",
|
37 |
+
"--do_eval",
|
38 |
+
"--predict_with_generate",
|
39 |
+
"--do_normalize_eval",
|
40 |
+
"--streaming",
|
41 |
+
"--use_auth_token",
|
42 |
+
"--push_to_hub",
|
43 |
+
"--report_to",
|
44 |
+
"wandb",
|
45 |
+
"--run_name",
|
46 |
+
"whisper-small-eu"
|
47 |
+
],
|
48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
+
"git": {
|
51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
+
},
|
54 |
+
"email": "[email protected]",
|
55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
+
"host": "tknika",
|
57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
+
"cpu_count": 8,
|
60 |
+
"cpu_count_logical": 8,
|
61 |
+
"gpu": "NVIDIA L40-48Q",
|
62 |
+
"gpu_count": 1,
|
63 |
+
"disk": {
|
64 |
+
"/": {
|
65 |
+
"total": "525987168256",
|
66 |
+
"used": "297346666496"
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"memory": {
|
70 |
+
"total": "33654022144"
|
71 |
+
},
|
72 |
+
"cpu": {
|
73 |
+
"count": 8,
|
74 |
+
"countLogical": 8
|
75 |
+
},
|
76 |
+
"gpu_nvidia": [
|
77 |
+
{
|
78 |
+
"name": "NVIDIA L40-48Q",
|
79 |
+
"memoryTotal": "51539607552",
|
80 |
+
"cudaCores": 18176,
|
81 |
+
"architecture": "Ada"
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"cudaVersion": "12.4"
|
85 |
+
}
|
wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
+
{"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110}
|
3 |
+
{"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}}
|
4 |
+
{"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"}
|
5 |
+
{"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
|
6 |
+
{"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
|
7 |
+
{"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"}
|
8 |
+
{"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"}
|
9 |
+
{"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"}
|
10 |
+
{"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"}
|
11 |
+
{"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"}
|
12 |
+
{"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"}
|
13 |
+
{"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"}
|
14 |
+
{"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"}
|
2 |
+
{"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"}
|
3 |
+
{"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"}
|
4 |
+
{"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"}
|
5 |
+
{"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"}
|
6 |
+
{"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"}
|
7 |
+
{"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
+
{"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"}
|
9 |
+
{"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
+
{"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
+
{"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
+
{"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"}
|
13 |
+
{"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"}
|
14 |
+
{"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"}
|
15 |
+
{"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"}
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110
|
3 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
|
7 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
|
8 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():756] calling init triggers
|
9 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
+
config: {'_wandb': {}}
|
11 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():789] starting backend
|
12 |
+
2025-02-12 12:26:37,272 INFO MainThread:224110 [wandb_init.py:init():793] sending inform_init request
|
13 |
+
2025-02-12 12:26:37,277 INFO MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
+
2025-02-12 12:26:37,277 INFO MainThread:224110 [wandb_init.py:init():808] backend started and connected
|
15 |
+
2025-02-12 12:26:37,279 INFO MainThread:224110 [wandb_init.py:init():901] updated telemetry
|
16 |
+
2025-02-12 12:26:37,285 INFO MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
+
2025-02-12 12:26:37,653 INFO MainThread:224110 [wandb_init.py:init():994] starting run threads in backend
|
18 |
+
2025-02-12 12:26:37,764 INFO MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
+
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
+
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
+
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
+
2025-02-12 12:26:37,766 INFO MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
+
2025-02-12 12:26:37,767 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
+
2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7a5cbc15a330>>
|
25 |
+
2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
+
2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb
ADDED
Binary file (11.3 kB). View file
|
|
wandb/run-20250212_122854-4m048f5s/files/config.yaml
ADDED
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_attn_implementation_autoset:
|
2 |
+
value: true
|
3 |
+
_name_or_path:
|
4 |
+
value: openai/whisper-small
|
5 |
+
_wandb:
|
6 |
+
value:
|
7 |
+
cli_version: 0.19.6
|
8 |
+
m:
|
9 |
+
- "1": train/global_step
|
10 |
+
"6":
|
11 |
+
- 3
|
12 |
+
"7": []
|
13 |
+
python_version: 3.12.3
|
14 |
+
t:
|
15 |
+
"1":
|
16 |
+
- 1
|
17 |
+
- 5
|
18 |
+
- 11
|
19 |
+
- 49
|
20 |
+
- 51
|
21 |
+
- 53
|
22 |
+
- 55
|
23 |
+
- 71
|
24 |
+
- 100
|
25 |
+
"2":
|
26 |
+
- 1
|
27 |
+
- 5
|
28 |
+
- 11
|
29 |
+
- 49
|
30 |
+
- 51
|
31 |
+
- 53
|
32 |
+
- 55
|
33 |
+
- 71
|
34 |
+
- 100
|
35 |
+
"3":
|
36 |
+
- 7
|
37 |
+
- 13
|
38 |
+
- 19
|
39 |
+
- 23
|
40 |
+
- 55
|
41 |
+
- 66
|
42 |
+
"4": 3.12.3
|
43 |
+
"5": 0.19.6
|
44 |
+
"6": 4.49.0.dev0
|
45 |
+
"8":
|
46 |
+
- 5
|
47 |
+
"9":
|
48 |
+
"1": transformers_trainer
|
49 |
+
"12": 0.19.6
|
50 |
+
"13": linux-x86_64
|
51 |
+
accelerator_config:
|
52 |
+
value:
|
53 |
+
dispatch_batches: null
|
54 |
+
even_batches: true
|
55 |
+
gradient_accumulation_kwargs: null
|
56 |
+
non_blocking: false
|
57 |
+
split_batches: false
|
58 |
+
use_seedable_sampler: true
|
59 |
+
activation_dropout:
|
60 |
+
value: 0
|
61 |
+
activation_function:
|
62 |
+
value: gelu
|
63 |
+
adafactor:
|
64 |
+
value: false
|
65 |
+
adam_beta1:
|
66 |
+
value: 0.9
|
67 |
+
adam_beta2:
|
68 |
+
value: 0.999
|
69 |
+
adam_epsilon:
|
70 |
+
value: 1e-08
|
71 |
+
add_cross_attention:
|
72 |
+
value: false
|
73 |
+
apply_spec_augment:
|
74 |
+
value: false
|
75 |
+
architectures:
|
76 |
+
value:
|
77 |
+
- WhisperForConditionalGeneration
|
78 |
+
attention_dropout:
|
79 |
+
value: 0
|
80 |
+
auto_find_batch_size:
|
81 |
+
value: false
|
82 |
+
average_tokens_across_devices:
|
83 |
+
value: false
|
84 |
+
bad_words_ids:
|
85 |
+
value: null
|
86 |
+
batch_eval_metrics:
|
87 |
+
value: false
|
88 |
+
begin_suppress_tokens:
|
89 |
+
value:
|
90 |
+
- 220
|
91 |
+
- 50257
|
92 |
+
bf16:
|
93 |
+
value: false
|
94 |
+
bf16_full_eval:
|
95 |
+
value: false
|
96 |
+
bos_token_id:
|
97 |
+
value: 50257
|
98 |
+
chunk_size_feed_forward:
|
99 |
+
value: 0
|
100 |
+
classifier_proj_size:
|
101 |
+
value: 256
|
102 |
+
cross_attention_hidden_size:
|
103 |
+
value: null
|
104 |
+
d_model:
|
105 |
+
value: 768
|
106 |
+
data_seed:
|
107 |
+
value: null
|
108 |
+
dataloader_drop_last:
|
109 |
+
value: false
|
110 |
+
dataloader_num_workers:
|
111 |
+
value: 0
|
112 |
+
dataloader_persistent_workers:
|
113 |
+
value: false
|
114 |
+
dataloader_pin_memory:
|
115 |
+
value: true
|
116 |
+
dataloader_prefetch_factor:
|
117 |
+
value: null
|
118 |
+
ddp_backend:
|
119 |
+
value: null
|
120 |
+
ddp_broadcast_buffers:
|
121 |
+
value: null
|
122 |
+
ddp_bucket_cap_mb:
|
123 |
+
value: null
|
124 |
+
ddp_find_unused_parameters:
|
125 |
+
value: null
|
126 |
+
ddp_timeout:
|
127 |
+
value: 1800
|
128 |
+
debug:
|
129 |
+
value: []
|
130 |
+
decoder_attention_heads:
|
131 |
+
value: 12
|
132 |
+
decoder_ffn_dim:
|
133 |
+
value: 3072
|
134 |
+
decoder_layerdrop:
|
135 |
+
value: 0
|
136 |
+
decoder_layers:
|
137 |
+
value: 12
|
138 |
+
decoder_start_token_id:
|
139 |
+
value: 50258
|
140 |
+
deepspeed:
|
141 |
+
value: null
|
142 |
+
disable_tqdm:
|
143 |
+
value: false
|
144 |
+
dispatch_batches:
|
145 |
+
value: null
|
146 |
+
diversity_penalty:
|
147 |
+
value: 0
|
148 |
+
do_eval:
|
149 |
+
value: true
|
150 |
+
do_predict:
|
151 |
+
value: false
|
152 |
+
do_sample:
|
153 |
+
value: false
|
154 |
+
do_train:
|
155 |
+
value: true
|
156 |
+
dropout:
|
157 |
+
value: 0
|
158 |
+
early_stopping:
|
159 |
+
value: false
|
160 |
+
encoder_attention_heads:
|
161 |
+
value: 12
|
162 |
+
encoder_ffn_dim:
|
163 |
+
value: 3072
|
164 |
+
encoder_layerdrop:
|
165 |
+
value: 0
|
166 |
+
encoder_layers:
|
167 |
+
value: 12
|
168 |
+
encoder_no_repeat_ngram_size:
|
169 |
+
value: 0
|
170 |
+
eos_token_id:
|
171 |
+
value: 50257
|
172 |
+
eval_accumulation_steps:
|
173 |
+
value: null
|
174 |
+
eval_delay:
|
175 |
+
value: 0
|
176 |
+
eval_do_concat_batches:
|
177 |
+
value: true
|
178 |
+
eval_on_start:
|
179 |
+
value: false
|
180 |
+
eval_steps:
|
181 |
+
value: 1000
|
182 |
+
eval_strategy:
|
183 |
+
value: steps
|
184 |
+
eval_use_gather_object:
|
185 |
+
value: false
|
186 |
+
evaluation_strategy:
|
187 |
+
value: steps
|
188 |
+
exponential_decay_length_penalty:
|
189 |
+
value: null
|
190 |
+
finetuning_task:
|
191 |
+
value: null
|
192 |
+
forced_bos_token_id:
|
193 |
+
value: null
|
194 |
+
forced_decoder_ids:
|
195 |
+
value: null
|
196 |
+
forced_eos_token_id:
|
197 |
+
value: null
|
198 |
+
fp16:
|
199 |
+
value: true
|
200 |
+
fp16_backend:
|
201 |
+
value: auto
|
202 |
+
fp16_full_eval:
|
203 |
+
value: false
|
204 |
+
fp16_opt_level:
|
205 |
+
value: O1
|
206 |
+
fsdp:
|
207 |
+
value: []
|
208 |
+
fsdp_config:
|
209 |
+
value:
|
210 |
+
min_num_params: 0
|
211 |
+
xla: false
|
212 |
+
xla_fsdp_grad_ckpt: false
|
213 |
+
xla_fsdp_v2: false
|
214 |
+
fsdp_min_num_params:
|
215 |
+
value: 0
|
216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
217 |
+
value: null
|
218 |
+
full_determinism:
|
219 |
+
value: false
|
220 |
+
generation_config:
|
221 |
+
value: null
|
222 |
+
generation_max_length:
|
223 |
+
value: 225
|
224 |
+
generation_num_beams:
|
225 |
+
value: null
|
226 |
+
gradient_accumulation_steps:
|
227 |
+
value: 1
|
228 |
+
gradient_checkpointing:
|
229 |
+
value: true
|
230 |
+
gradient_checkpointing_kwargs:
|
231 |
+
value: null
|
232 |
+
greater_is_better:
|
233 |
+
value: false
|
234 |
+
group_by_length:
|
235 |
+
value: false
|
236 |
+
half_precision_backend:
|
237 |
+
value: auto
|
238 |
+
hub_always_push:
|
239 |
+
value: false
|
240 |
+
hub_model_id:
|
241 |
+
value: null
|
242 |
+
hub_private_repo:
|
243 |
+
value: null
|
244 |
+
hub_strategy:
|
245 |
+
value: every_save
|
246 |
+
hub_token:
|
247 |
+
value: <HUB_TOKEN>
|
248 |
+
id2label:
|
249 |
+
value:
|
250 |
+
"0": LABEL_0
|
251 |
+
"1": LABEL_1
|
252 |
+
ignore_data_skip:
|
253 |
+
value: false
|
254 |
+
include_for_metrics:
|
255 |
+
value: []
|
256 |
+
include_inputs_for_metrics:
|
257 |
+
value: false
|
258 |
+
include_num_input_tokens_seen:
|
259 |
+
value: false
|
260 |
+
include_tokens_per_second:
|
261 |
+
value: false
|
262 |
+
init_std:
|
263 |
+
value: 0.02
|
264 |
+
is_decoder:
|
265 |
+
value: false
|
266 |
+
is_encoder_decoder:
|
267 |
+
value: true
|
268 |
+
jit_mode_eval:
|
269 |
+
value: false
|
270 |
+
label_names:
|
271 |
+
value: null
|
272 |
+
label_smoothing_factor:
|
273 |
+
value: 0
|
274 |
+
label2id:
|
275 |
+
value:
|
276 |
+
LABEL_0: 0
|
277 |
+
LABEL_1: 1
|
278 |
+
learning_rate:
|
279 |
+
value: 1e-05
|
280 |
+
length_column_name:
|
281 |
+
value: input_length
|
282 |
+
length_penalty:
|
283 |
+
value: 1
|
284 |
+
load_best_model_at_end:
|
285 |
+
value: true
|
286 |
+
local_rank:
|
287 |
+
value: 0
|
288 |
+
log_level:
|
289 |
+
value: passive
|
290 |
+
log_level_replica:
|
291 |
+
value: warning
|
292 |
+
log_on_each_node:
|
293 |
+
value: true
|
294 |
+
logging_dir:
|
295 |
+
value: ./runs/Feb12_12-28-29_tknika
|
296 |
+
logging_first_step:
|
297 |
+
value: false
|
298 |
+
logging_nan_inf_filter:
|
299 |
+
value: true
|
300 |
+
logging_steps:
|
301 |
+
value: 25
|
302 |
+
logging_strategy:
|
303 |
+
value: steps
|
304 |
+
lr_scheduler_type:
|
305 |
+
value: linear
|
306 |
+
mask_feature_length:
|
307 |
+
value: 10
|
308 |
+
mask_feature_min_masks:
|
309 |
+
value: 0
|
310 |
+
mask_feature_prob:
|
311 |
+
value: 0
|
312 |
+
mask_time_length:
|
313 |
+
value: 10
|
314 |
+
mask_time_min_masks:
|
315 |
+
value: 2
|
316 |
+
mask_time_prob:
|
317 |
+
value: 0.05
|
318 |
+
max_grad_norm:
|
319 |
+
value: 1
|
320 |
+
max_length:
|
321 |
+
value: 448
|
322 |
+
max_source_positions:
|
323 |
+
value: 1500
|
324 |
+
max_steps:
|
325 |
+
value: 8000
|
326 |
+
max_target_positions:
|
327 |
+
value: 448
|
328 |
+
median_filter_width:
|
329 |
+
value: 7
|
330 |
+
metric_for_best_model:
|
331 |
+
value: wer
|
332 |
+
min_length:
|
333 |
+
value: 0
|
334 |
+
model/num_parameters:
|
335 |
+
value: 241734912
|
336 |
+
model_type:
|
337 |
+
value: whisper
|
338 |
+
mp_parameters:
|
339 |
+
value: ""
|
340 |
+
neftune_noise_alpha:
|
341 |
+
value: null
|
342 |
+
no_cuda:
|
343 |
+
value: false
|
344 |
+
no_repeat_ngram_size:
|
345 |
+
value: 0
|
346 |
+
num_beam_groups:
|
347 |
+
value: 1
|
348 |
+
num_beams:
|
349 |
+
value: 1
|
350 |
+
num_hidden_layers:
|
351 |
+
value: 12
|
352 |
+
num_mel_bins:
|
353 |
+
value: 80
|
354 |
+
num_return_sequences:
|
355 |
+
value: 1
|
356 |
+
num_train_epochs:
|
357 |
+
value: 3
|
358 |
+
optim:
|
359 |
+
value: adamw_torch
|
360 |
+
optim_args:
|
361 |
+
value: null
|
362 |
+
optim_target_modules:
|
363 |
+
value: null
|
364 |
+
output_attentions:
|
365 |
+
value: false
|
366 |
+
output_dir:
|
367 |
+
value: ./
|
368 |
+
output_hidden_states:
|
369 |
+
value: false
|
370 |
+
output_scores:
|
371 |
+
value: false
|
372 |
+
overwrite_output_dir:
|
373 |
+
value: true
|
374 |
+
pad_token_id:
|
375 |
+
value: 50257
|
376 |
+
past_index:
|
377 |
+
value: -1
|
378 |
+
per_device_eval_batch_size:
|
379 |
+
value: 16
|
380 |
+
per_device_train_batch_size:
|
381 |
+
value: 32
|
382 |
+
per_gpu_eval_batch_size:
|
383 |
+
value: null
|
384 |
+
per_gpu_train_batch_size:
|
385 |
+
value: null
|
386 |
+
predict_with_generate:
|
387 |
+
value: true
|
388 |
+
prediction_loss_only:
|
389 |
+
value: false
|
390 |
+
prefix:
|
391 |
+
value: null
|
392 |
+
problem_type:
|
393 |
+
value: null
|
394 |
+
push_to_hub:
|
395 |
+
value: true
|
396 |
+
push_to_hub_model_id:
|
397 |
+
value: null
|
398 |
+
push_to_hub_organization:
|
399 |
+
value: null
|
400 |
+
push_to_hub_token:
|
401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
402 |
+
ray_scope:
|
403 |
+
value: last
|
404 |
+
remove_invalid_values:
|
405 |
+
value: false
|
406 |
+
remove_unused_columns:
|
407 |
+
value: true
|
408 |
+
repetition_penalty:
|
409 |
+
value: 1
|
410 |
+
report_to:
|
411 |
+
value:
|
412 |
+
- wandb
|
413 |
+
restore_callback_states_from_checkpoint:
|
414 |
+
value: false
|
415 |
+
resume_from_checkpoint:
|
416 |
+
value: null
|
417 |
+
return_dict:
|
418 |
+
value: true
|
419 |
+
return_dict_in_generate:
|
420 |
+
value: false
|
421 |
+
run_name:
|
422 |
+
value: whisper-small-eu
|
423 |
+
save_on_each_node:
|
424 |
+
value: false
|
425 |
+
save_only_model:
|
426 |
+
value: false
|
427 |
+
save_safetensors:
|
428 |
+
value: true
|
429 |
+
save_steps:
|
430 |
+
value: 1000
|
431 |
+
save_strategy:
|
432 |
+
value: steps
|
433 |
+
save_total_limit:
|
434 |
+
value: null
|
435 |
+
scale_embedding:
|
436 |
+
value: false
|
437 |
+
seed:
|
438 |
+
value: 42
|
439 |
+
sep_token_id:
|
440 |
+
value: null
|
441 |
+
skip_memory_metrics:
|
442 |
+
value: true
|
443 |
+
sortish_sampler:
|
444 |
+
value: false
|
445 |
+
split_batches:
|
446 |
+
value: null
|
447 |
+
suppress_tokens:
|
448 |
+
value: null
|
449 |
+
task_specific_params:
|
450 |
+
value: null
|
451 |
+
temperature:
|
452 |
+
value: 1
|
453 |
+
tf_legacy_loss:
|
454 |
+
value: false
|
455 |
+
tf32:
|
456 |
+
value: null
|
457 |
+
tie_encoder_decoder:
|
458 |
+
value: false
|
459 |
+
tie_word_embeddings:
|
460 |
+
value: true
|
461 |
+
tokenizer_class:
|
462 |
+
value: null
|
463 |
+
top_k:
|
464 |
+
value: 50
|
465 |
+
top_p:
|
466 |
+
value: 1
|
467 |
+
torch_compile:
|
468 |
+
value: false
|
469 |
+
torch_compile_backend:
|
470 |
+
value: null
|
471 |
+
torch_compile_mode:
|
472 |
+
value: null
|
473 |
+
torch_dtype:
|
474 |
+
value: float32
|
475 |
+
torch_empty_cache_steps:
|
476 |
+
value: null
|
477 |
+
torchdynamo:
|
478 |
+
value: null
|
479 |
+
torchscript:
|
480 |
+
value: false
|
481 |
+
tpu_metrics_debug:
|
482 |
+
value: false
|
483 |
+
tpu_num_cores:
|
484 |
+
value: null
|
485 |
+
transformers_version:
|
486 |
+
value: 4.49.0.dev0
|
487 |
+
typical_p:
|
488 |
+
value: 1
|
489 |
+
use_bfloat16:
|
490 |
+
value: false
|
491 |
+
use_cache:
|
492 |
+
value: false
|
493 |
+
use_cpu:
|
494 |
+
value: false
|
495 |
+
use_ipex:
|
496 |
+
value: false
|
497 |
+
use_legacy_prediction_loop:
|
498 |
+
value: false
|
499 |
+
use_liger_kernel:
|
500 |
+
value: false
|
501 |
+
use_mps_device:
|
502 |
+
value: false
|
503 |
+
use_weighted_layer_sum:
|
504 |
+
value: false
|
505 |
+
vocab_size:
|
506 |
+
value: 51865
|
507 |
+
warmup_ratio:
|
508 |
+
value: 0
|
509 |
+
warmup_steps:
|
510 |
+
value: 500
|
511 |
+
weight_decay:
|
512 |
+
value: 0
|
wandb/run-20250212_122854-4m048f5s/files/output.log
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
+
main()
|
4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
+
return inner_training_loop(
|
9 |
+
^^^^^^^^^^^^^^^^^^^^
|
10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
+
result = getattr(callback, event)(
|
18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_122854-4m048f5s/files/requirements.txt
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiosignal==1.3.2
|
2 |
+
Markdown==3.7
|
3 |
+
more-itertools==10.6.0
|
4 |
+
requests==2.32.3
|
5 |
+
sentry-sdk==2.21.0
|
6 |
+
torchaudio==2.6.0
|
7 |
+
charset-normalizer==3.4.1
|
8 |
+
docker-pycreds==0.4.0
|
9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
10 |
+
PyYAML==6.0.2
|
11 |
+
librosa==0.10.2.post1
|
12 |
+
soxr==0.5.0.post1
|
13 |
+
multiprocess==0.70.16
|
14 |
+
setuptools==75.8.0
|
15 |
+
nvidia-cufft-cu12==11.2.1.3
|
16 |
+
joblib==1.4.2
|
17 |
+
pytz==2025.1
|
18 |
+
pip==24.0
|
19 |
+
scikit-learn==1.6.1
|
20 |
+
certifi==2025.1.31
|
21 |
+
jiwer==3.1.0
|
22 |
+
regex==2024.11.6
|
23 |
+
annotated-types==0.7.0
|
24 |
+
grpcio==1.70.0
|
25 |
+
msgpack==1.1.0
|
26 |
+
mpmath==1.3.0
|
27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
28 |
+
soundfile==0.13.1
|
29 |
+
dill==0.3.8
|
30 |
+
nvidia-nvtx-cu12==12.4.127
|
31 |
+
six==1.17.0
|
32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
+
pyarrow==19.0.0
|
34 |
+
nvidia-nccl-cu12==2.21.5
|
35 |
+
psutil==6.1.1
|
36 |
+
decorator==5.1.1
|
37 |
+
llvmlite==0.44.0
|
38 |
+
frozenlist==1.5.0
|
39 |
+
pydantic==2.10.6
|
40 |
+
networkx==3.4.2
|
41 |
+
idna==3.10
|
42 |
+
wandb==0.19.6
|
43 |
+
aiohttp==3.11.12
|
44 |
+
RapidFuzz==3.12.1
|
45 |
+
pandas==2.2.3
|
46 |
+
python-dateutil==2.9.0.post0
|
47 |
+
numpy==2.1.3
|
48 |
+
tokenizers==0.21.0
|
49 |
+
nvidia-cusparselt-cu12==0.6.2
|
50 |
+
typing_extensions==4.12.2
|
51 |
+
urllib3==2.3.0
|
52 |
+
setproctitle==1.3.4
|
53 |
+
tzdata==2025.1
|
54 |
+
sympy==1.13.1
|
55 |
+
pooch==1.8.2
|
56 |
+
click==8.1.8
|
57 |
+
pydantic_core==2.27.2
|
58 |
+
MarkupSafe==3.0.2
|
59 |
+
scipy==1.15.1
|
60 |
+
accelerate==1.3.0
|
61 |
+
tensorboard==2.19.0
|
62 |
+
protobuf==5.29.3
|
63 |
+
gitdb==4.0.12
|
64 |
+
smmap==5.0.2
|
65 |
+
absl-py==2.1.0
|
66 |
+
tqdm==4.67.1
|
67 |
+
yarl==1.18.3
|
68 |
+
pycparser==2.22
|
69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
70 |
+
attrs==25.1.0
|
71 |
+
lazy_loader==0.4
|
72 |
+
tensorboard-data-server==0.7.2
|
73 |
+
threadpoolctl==3.5.0
|
74 |
+
GitPython==3.1.44
|
75 |
+
safetensors==0.5.2
|
76 |
+
fsspec==2024.12.0
|
77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
+
filelock==3.17.0
|
79 |
+
aiohappyeyeballs==2.4.6
|
80 |
+
packaging==24.2
|
81 |
+
datasets==3.2.1.dev0
|
82 |
+
audioread==3.0.1
|
83 |
+
propcache==0.2.1
|
84 |
+
transformers==4.49.0.dev0
|
85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
+
cffi==1.17.1
|
87 |
+
evaluate==0.4.3
|
88 |
+
Werkzeug==3.1.3
|
89 |
+
huggingface-hub==0.28.1
|
90 |
+
Jinja2==3.1.5
|
91 |
+
torch==2.6.0
|
92 |
+
nvidia-curand-cu12==10.3.5.147
|
93 |
+
xxhash==3.5.0
|
94 |
+
platformdirs==4.3.6
|
95 |
+
multidict==6.1.0
|
96 |
+
nvidia-cublas-cu12==12.4.5.8
|
97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
98 |
+
triton==3.2.0
|
99 |
+
numba==0.61.0
|
100 |
+
importlib_metadata==8.0.0
|
101 |
+
platformdirs==4.2.2
|
102 |
+
typeguard==4.3.0
|
103 |
+
more-itertools==10.3.0
|
104 |
+
tomli==2.0.1
|
105 |
+
autocommand==2.2.2
|
106 |
+
zipp==3.19.2
|
107 |
+
typing_extensions==4.12.2
|
108 |
+
backports.tarfile==1.2.0
|
109 |
+
inflect==7.3.1
|
110 |
+
jaraco.text==3.12.1
|
111 |
+
wheel==0.43.0
|
112 |
+
packaging==24.2
|
113 |
+
jaraco.collections==5.1.0
|
114 |
+
jaraco.functools==4.0.1
|
115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "CPython 3.12.3",
|
4 |
+
"startedAt": "2025-02-12T12:28:54.528397Z",
|
5 |
+
"args": [
|
6 |
+
"--model_name_or_path=openai/whisper-small",
|
7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
+
"--language=basque",
|
9 |
+
"--train_split_name=train",
|
10 |
+
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
11 |
+
"--model_index_name=Whisper Small Basque",
|
12 |
+
"--max_steps=8000",
|
13 |
+
"--output_dir=./",
|
14 |
+
"--per_device_train_batch_size=32",
|
15 |
+
"--per_device_eval_batch_size=16",
|
16 |
+
"--gradient_accumulation_steps=1",
|
17 |
+
"--logging_steps=25",
|
18 |
+
"--learning_rate=1e-5",
|
19 |
+
"--warmup_steps=500",
|
20 |
+
"--evaluation_strategy=steps",
|
21 |
+
"--eval_steps=1000",
|
22 |
+
"--save_strategy=steps",
|
23 |
+
"--save_steps=1000",
|
24 |
+
"--generation_max_length=225",
|
25 |
+
"--length_column_name=input_length",
|
26 |
+
"--max_duration_in_seconds=30",
|
27 |
+
"--text_column_name=sentence",
|
28 |
+
"--freeze_feature_encoder=False",
|
29 |
+
"--report_to=tensorboard",
|
30 |
+
"--metric_for_best_model=wer",
|
31 |
+
"--greater_is_better=False",
|
32 |
+
"--load_best_model_at_end",
|
33 |
+
"--gradient_checkpointing",
|
34 |
+
"--fp16",
|
35 |
+
"--overwrite_output_dir",
|
36 |
+
"--do_train",
|
37 |
+
"--do_eval",
|
38 |
+
"--predict_with_generate",
|
39 |
+
"--do_normalize_eval",
|
40 |
+
"--streaming",
|
41 |
+
"--use_auth_token",
|
42 |
+
"--push_to_hub",
|
43 |
+
"--report_to",
|
44 |
+
"wandb",
|
45 |
+
"--run_name",
|
46 |
+
"whisper-small-eu"
|
47 |
+
],
|
48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
+
"git": {
|
51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
+
},
|
54 |
+
"email": "[email protected]",
|
55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
+
"host": "tknika",
|
57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
+
"cpu_count": 8,
|
60 |
+
"cpu_count_logical": 8,
|
61 |
+
"gpu": "NVIDIA L40-48Q",
|
62 |
+
"gpu_count": 1,
|
63 |
+
"disk": {
|
64 |
+
"/": {
|
65 |
+
"total": "525987168256",
|
66 |
+
"used": "297346756608"
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"memory": {
|
70 |
+
"total": "33654022144"
|
71 |
+
},
|
72 |
+
"cpu": {
|
73 |
+
"count": 8,
|
74 |
+
"countLogical": 8
|
75 |
+
},
|
76 |
+
"gpu_nvidia": [
|
77 |
+
{
|
78 |
+
"name": "NVIDIA L40-48Q",
|
79 |
+
"memoryTotal": "51539607552",
|
80 |
+
"cudaCores": 18176,
|
81 |
+
"architecture": "Ada"
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"cudaVersion": "12.4"
|
85 |
+
}
|
wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_122854-4m048f5s/logs/debug-core.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
+
{"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528}
|
3 |
+
{"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}}
|
4 |
+
{"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"}
|
5 |
+
{"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"}
|
6 |
+
{"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"}
|
7 |
+
{"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"}
|
8 |
+
{"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"}
|
9 |
+
{"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"}
|
10 |
+
{"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"}
|
11 |
+
{"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"}
|
12 |
+
{"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"}
|
13 |
+
{"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"}
|
14 |
+
{"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"}
|
2 |
+
{"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"}
|
3 |
+
{"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"}
|
4 |
+
{"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"}
|
5 |
+
{"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"}
|
6 |
+
{"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"}
|
7 |
+
{"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
+
{"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"}
|
9 |
+
{"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
+
{"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
+
{"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
+
{"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"}
|
13 |
+
{"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"}
|
14 |
+
{"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"}
|
15 |
+
{"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"}
|
wandb/run-20250212_122854-4m048f5s/logs/debug.log
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528
|
3 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log
|
7 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
|
8 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():756] calling init triggers
|
9 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
+
config: {'_wandb': {}}
|
11 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():789] starting backend
|
12 |
+
2025-02-12 12:28:54,521 INFO MainThread:224528 [wandb_init.py:init():793] sending inform_init request
|
13 |
+
2025-02-12 12:28:54,527 INFO MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
+
2025-02-12 12:28:54,528 INFO MainThread:224528 [wandb_init.py:init():808] backend started and connected
|
15 |
+
2025-02-12 12:28:54,530 INFO MainThread:224528 [wandb_init.py:init():901] updated telemetry
|
16 |
+
2025-02-12 12:28:54,537 INFO MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
+
2025-02-12 12:28:54,883 INFO MainThread:224528 [wandb_init.py:init():994] starting run threads in backend
|
18 |
+
2025-02-12 12:28:54,988 INFO MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
+
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
+
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
+
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
+
2025-02-12 12:28:54,990 INFO MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
+
2025-02-12 12:28:54,991 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
+
2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x71c5f6c57cb0>>
|
25 |
+
2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
+
2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb
ADDED
Binary file (11.3 kB). View file
|
|
wandb/run-20250212_125202-c6xjc1gs/files/config.yaml
ADDED
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_attn_implementation_autoset:
|
2 |
+
value: true
|
3 |
+
_name_or_path:
|
4 |
+
value: openai/whisper-small
|
5 |
+
_wandb:
|
6 |
+
value:
|
7 |
+
cli_version: 0.19.6
|
8 |
+
m:
|
9 |
+
- "1": train/global_step
|
10 |
+
"6":
|
11 |
+
- 3
|
12 |
+
"7": []
|
13 |
+
python_version: 3.12.3
|
14 |
+
t:
|
15 |
+
"1":
|
16 |
+
- 1
|
17 |
+
- 5
|
18 |
+
- 11
|
19 |
+
- 49
|
20 |
+
- 51
|
21 |
+
- 53
|
22 |
+
- 55
|
23 |
+
- 71
|
24 |
+
- 100
|
25 |
+
"2":
|
26 |
+
- 1
|
27 |
+
- 5
|
28 |
+
- 11
|
29 |
+
- 49
|
30 |
+
- 51
|
31 |
+
- 53
|
32 |
+
- 55
|
33 |
+
- 71
|
34 |
+
- 100
|
35 |
+
"3":
|
36 |
+
- 7
|
37 |
+
- 13
|
38 |
+
- 19
|
39 |
+
- 23
|
40 |
+
- 55
|
41 |
+
- 66
|
42 |
+
"4": 3.12.3
|
43 |
+
"5": 0.19.6
|
44 |
+
"6": 4.49.0.dev0
|
45 |
+
"8":
|
46 |
+
- 5
|
47 |
+
"9":
|
48 |
+
"1": transformers_trainer
|
49 |
+
"12": 0.19.6
|
50 |
+
"13": linux-x86_64
|
51 |
+
accelerator_config:
|
52 |
+
value:
|
53 |
+
dispatch_batches: null
|
54 |
+
even_batches: true
|
55 |
+
gradient_accumulation_kwargs: null
|
56 |
+
non_blocking: false
|
57 |
+
split_batches: false
|
58 |
+
use_seedable_sampler: true
|
59 |
+
activation_dropout:
|
60 |
+
value: 0
|
61 |
+
activation_function:
|
62 |
+
value: gelu
|
63 |
+
adafactor:
|
64 |
+
value: false
|
65 |
+
adam_beta1:
|
66 |
+
value: 0.9
|
67 |
+
adam_beta2:
|
68 |
+
value: 0.999
|
69 |
+
adam_epsilon:
|
70 |
+
value: 1e-08
|
71 |
+
add_cross_attention:
|
72 |
+
value: false
|
73 |
+
apply_spec_augment:
|
74 |
+
value: false
|
75 |
+
architectures:
|
76 |
+
value:
|
77 |
+
- WhisperForConditionalGeneration
|
78 |
+
attention_dropout:
|
79 |
+
value: 0
|
80 |
+
auto_find_batch_size:
|
81 |
+
value: false
|
82 |
+
average_tokens_across_devices:
|
83 |
+
value: false
|
84 |
+
bad_words_ids:
|
85 |
+
value: null
|
86 |
+
batch_eval_metrics:
|
87 |
+
value: false
|
88 |
+
begin_suppress_tokens:
|
89 |
+
value:
|
90 |
+
- 220
|
91 |
+
- 50257
|
92 |
+
bf16:
|
93 |
+
value: false
|
94 |
+
bf16_full_eval:
|
95 |
+
value: false
|
96 |
+
bos_token_id:
|
97 |
+
value: 50257
|
98 |
+
chunk_size_feed_forward:
|
99 |
+
value: 0
|
100 |
+
classifier_proj_size:
|
101 |
+
value: 256
|
102 |
+
cross_attention_hidden_size:
|
103 |
+
value: null
|
104 |
+
d_model:
|
105 |
+
value: 768
|
106 |
+
data_seed:
|
107 |
+
value: null
|
108 |
+
dataloader_drop_last:
|
109 |
+
value: false
|
110 |
+
dataloader_num_workers:
|
111 |
+
value: 0
|
112 |
+
dataloader_persistent_workers:
|
113 |
+
value: false
|
114 |
+
dataloader_pin_memory:
|
115 |
+
value: true
|
116 |
+
dataloader_prefetch_factor:
|
117 |
+
value: null
|
118 |
+
ddp_backend:
|
119 |
+
value: null
|
120 |
+
ddp_broadcast_buffers:
|
121 |
+
value: null
|
122 |
+
ddp_bucket_cap_mb:
|
123 |
+
value: null
|
124 |
+
ddp_find_unused_parameters:
|
125 |
+
value: null
|
126 |
+
ddp_timeout:
|
127 |
+
value: 1800
|
128 |
+
debug:
|
129 |
+
value: []
|
130 |
+
decoder_attention_heads:
|
131 |
+
value: 12
|
132 |
+
decoder_ffn_dim:
|
133 |
+
value: 3072
|
134 |
+
decoder_layerdrop:
|
135 |
+
value: 0
|
136 |
+
decoder_layers:
|
137 |
+
value: 12
|
138 |
+
decoder_start_token_id:
|
139 |
+
value: 50258
|
140 |
+
deepspeed:
|
141 |
+
value: null
|
142 |
+
disable_tqdm:
|
143 |
+
value: false
|
144 |
+
dispatch_batches:
|
145 |
+
value: null
|
146 |
+
diversity_penalty:
|
147 |
+
value: 0
|
148 |
+
do_eval:
|
149 |
+
value: true
|
150 |
+
do_predict:
|
151 |
+
value: false
|
152 |
+
do_sample:
|
153 |
+
value: false
|
154 |
+
do_train:
|
155 |
+
value: true
|
156 |
+
dropout:
|
157 |
+
value: 0
|
158 |
+
early_stopping:
|
159 |
+
value: false
|
160 |
+
encoder_attention_heads:
|
161 |
+
value: 12
|
162 |
+
encoder_ffn_dim:
|
163 |
+
value: 3072
|
164 |
+
encoder_layerdrop:
|
165 |
+
value: 0
|
166 |
+
encoder_layers:
|
167 |
+
value: 12
|
168 |
+
encoder_no_repeat_ngram_size:
|
169 |
+
value: 0
|
170 |
+
eos_token_id:
|
171 |
+
value: 50257
|
172 |
+
eval_accumulation_steps:
|
173 |
+
value: null
|
174 |
+
eval_delay:
|
175 |
+
value: 0
|
176 |
+
eval_do_concat_batches:
|
177 |
+
value: true
|
178 |
+
eval_on_start:
|
179 |
+
value: false
|
180 |
+
eval_steps:
|
181 |
+
value: 1000
|
182 |
+
eval_strategy:
|
183 |
+
value: steps
|
184 |
+
eval_use_gather_object:
|
185 |
+
value: false
|
186 |
+
evaluation_strategy:
|
187 |
+
value: steps
|
188 |
+
exponential_decay_length_penalty:
|
189 |
+
value: null
|
190 |
+
finetuning_task:
|
191 |
+
value: null
|
192 |
+
forced_bos_token_id:
|
193 |
+
value: null
|
194 |
+
forced_decoder_ids:
|
195 |
+
value: null
|
196 |
+
forced_eos_token_id:
|
197 |
+
value: null
|
198 |
+
fp16:
|
199 |
+
value: true
|
200 |
+
fp16_backend:
|
201 |
+
value: auto
|
202 |
+
fp16_full_eval:
|
203 |
+
value: false
|
204 |
+
fp16_opt_level:
|
205 |
+
value: O1
|
206 |
+
fsdp:
|
207 |
+
value: []
|
208 |
+
fsdp_config:
|
209 |
+
value:
|
210 |
+
min_num_params: 0
|
211 |
+
xla: false
|
212 |
+
xla_fsdp_grad_ckpt: false
|
213 |
+
xla_fsdp_v2: false
|
214 |
+
fsdp_min_num_params:
|
215 |
+
value: 0
|
216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
217 |
+
value: null
|
218 |
+
full_determinism:
|
219 |
+
value: false
|
220 |
+
generation_config:
|
221 |
+
value: null
|
222 |
+
generation_max_length:
|
223 |
+
value: 225
|
224 |
+
generation_num_beams:
|
225 |
+
value: null
|
226 |
+
gradient_accumulation_steps:
|
227 |
+
value: 1
|
228 |
+
gradient_checkpointing:
|
229 |
+
value: true
|
230 |
+
gradient_checkpointing_kwargs:
|
231 |
+
value: null
|
232 |
+
greater_is_better:
|
233 |
+
value: false
|
234 |
+
group_by_length:
|
235 |
+
value: false
|
236 |
+
half_precision_backend:
|
237 |
+
value: auto
|
238 |
+
hub_always_push:
|
239 |
+
value: false
|
240 |
+
hub_model_id:
|
241 |
+
value: null
|
242 |
+
hub_private_repo:
|
243 |
+
value: null
|
244 |
+
hub_strategy:
|
245 |
+
value: every_save
|
246 |
+
hub_token:
|
247 |
+
value: <HUB_TOKEN>
|
248 |
+
id2label:
|
249 |
+
value:
|
250 |
+
"0": LABEL_0
|
251 |
+
"1": LABEL_1
|
252 |
+
ignore_data_skip:
|
253 |
+
value: false
|
254 |
+
include_for_metrics:
|
255 |
+
value: []
|
256 |
+
include_inputs_for_metrics:
|
257 |
+
value: false
|
258 |
+
include_num_input_tokens_seen:
|
259 |
+
value: false
|
260 |
+
include_tokens_per_second:
|
261 |
+
value: false
|
262 |
+
init_std:
|
263 |
+
value: 0.02
|
264 |
+
is_decoder:
|
265 |
+
value: false
|
266 |
+
is_encoder_decoder:
|
267 |
+
value: true
|
268 |
+
jit_mode_eval:
|
269 |
+
value: false
|
270 |
+
label_names:
|
271 |
+
value: null
|
272 |
+
label_smoothing_factor:
|
273 |
+
value: 0
|
274 |
+
label2id:
|
275 |
+
value:
|
276 |
+
LABEL_0: 0
|
277 |
+
LABEL_1: 1
|
278 |
+
learning_rate:
|
279 |
+
value: 1e-05
|
280 |
+
length_column_name:
|
281 |
+
value: input_length
|
282 |
+
length_penalty:
|
283 |
+
value: 1
|
284 |
+
load_best_model_at_end:
|
285 |
+
value: true
|
286 |
+
local_rank:
|
287 |
+
value: 0
|
288 |
+
log_level:
|
289 |
+
value: passive
|
290 |
+
log_level_replica:
|
291 |
+
value: warning
|
292 |
+
log_on_each_node:
|
293 |
+
value: true
|
294 |
+
logging_dir:
|
295 |
+
value: ./runs/Feb12_12-51-48_tknika
|
296 |
+
logging_first_step:
|
297 |
+
value: false
|
298 |
+
logging_nan_inf_filter:
|
299 |
+
value: true
|
300 |
+
logging_steps:
|
301 |
+
value: 25
|
302 |
+
logging_strategy:
|
303 |
+
value: steps
|
304 |
+
lr_scheduler_type:
|
305 |
+
value: linear
|
306 |
+
mask_feature_length:
|
307 |
+
value: 10
|
308 |
+
mask_feature_min_masks:
|
309 |
+
value: 0
|
310 |
+
mask_feature_prob:
|
311 |
+
value: 0
|
312 |
+
mask_time_length:
|
313 |
+
value: 10
|
314 |
+
mask_time_min_masks:
|
315 |
+
value: 2
|
316 |
+
mask_time_prob:
|
317 |
+
value: 0.05
|
318 |
+
max_grad_norm:
|
319 |
+
value: 1
|
320 |
+
max_length:
|
321 |
+
value: 448
|
322 |
+
max_source_positions:
|
323 |
+
value: 1500
|
324 |
+
max_steps:
|
325 |
+
value: 8000
|
326 |
+
max_target_positions:
|
327 |
+
value: 448
|
328 |
+
median_filter_width:
|
329 |
+
value: 7
|
330 |
+
metric_for_best_model:
|
331 |
+
value: wer
|
332 |
+
min_length:
|
333 |
+
value: 0
|
334 |
+
model/num_parameters:
|
335 |
+
value: 241734912
|
336 |
+
model_type:
|
337 |
+
value: whisper
|
338 |
+
mp_parameters:
|
339 |
+
value: ""
|
340 |
+
neftune_noise_alpha:
|
341 |
+
value: null
|
342 |
+
no_cuda:
|
343 |
+
value: false
|
344 |
+
no_repeat_ngram_size:
|
345 |
+
value: 0
|
346 |
+
num_beam_groups:
|
347 |
+
value: 1
|
348 |
+
num_beams:
|
349 |
+
value: 1
|
350 |
+
num_hidden_layers:
|
351 |
+
value: 12
|
352 |
+
num_mel_bins:
|
353 |
+
value: 80
|
354 |
+
num_return_sequences:
|
355 |
+
value: 1
|
356 |
+
num_train_epochs:
|
357 |
+
value: 3
|
358 |
+
optim:
|
359 |
+
value: adamw_torch
|
360 |
+
optim_args:
|
361 |
+
value: null
|
362 |
+
optim_target_modules:
|
363 |
+
value: null
|
364 |
+
output_attentions:
|
365 |
+
value: false
|
366 |
+
output_dir:
|
367 |
+
value: ./
|
368 |
+
output_hidden_states:
|
369 |
+
value: false
|
370 |
+
output_scores:
|
371 |
+
value: false
|
372 |
+
overwrite_output_dir:
|
373 |
+
value: true
|
374 |
+
pad_token_id:
|
375 |
+
value: 50257
|
376 |
+
past_index:
|
377 |
+
value: -1
|
378 |
+
per_device_eval_batch_size:
|
379 |
+
value: 16
|
380 |
+
per_device_train_batch_size:
|
381 |
+
value: 32
|
382 |
+
per_gpu_eval_batch_size:
|
383 |
+
value: null
|
384 |
+
per_gpu_train_batch_size:
|
385 |
+
value: null
|
386 |
+
predict_with_generate:
|
387 |
+
value: true
|
388 |
+
prediction_loss_only:
|
389 |
+
value: false
|
390 |
+
prefix:
|
391 |
+
value: null
|
392 |
+
problem_type:
|
393 |
+
value: null
|
394 |
+
push_to_hub:
|
395 |
+
value: true
|
396 |
+
push_to_hub_model_id:
|
397 |
+
value: null
|
398 |
+
push_to_hub_organization:
|
399 |
+
value: null
|
400 |
+
push_to_hub_token:
|
401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
402 |
+
ray_scope:
|
403 |
+
value: last
|
404 |
+
remove_invalid_values:
|
405 |
+
value: false
|
406 |
+
remove_unused_columns:
|
407 |
+
value: true
|
408 |
+
repetition_penalty:
|
409 |
+
value: 1
|
410 |
+
report_to:
|
411 |
+
value:
|
412 |
+
- wandb
|
413 |
+
restore_callback_states_from_checkpoint:
|
414 |
+
value: false
|
415 |
+
resume_from_checkpoint:
|
416 |
+
value: null
|
417 |
+
return_dict:
|
418 |
+
value: true
|
419 |
+
return_dict_in_generate:
|
420 |
+
value: false
|
421 |
+
run_name:
|
422 |
+
value: whisper-small-eu
|
423 |
+
save_on_each_node:
|
424 |
+
value: false
|
425 |
+
save_only_model:
|
426 |
+
value: false
|
427 |
+
save_safetensors:
|
428 |
+
value: true
|
429 |
+
save_steps:
|
430 |
+
value: 1000
|
431 |
+
save_strategy:
|
432 |
+
value: steps
|
433 |
+
save_total_limit:
|
434 |
+
value: null
|
435 |
+
scale_embedding:
|
436 |
+
value: false
|
437 |
+
seed:
|
438 |
+
value: 42
|
439 |
+
sep_token_id:
|
440 |
+
value: null
|
441 |
+
skip_memory_metrics:
|
442 |
+
value: true
|
443 |
+
sortish_sampler:
|
444 |
+
value: false
|
445 |
+
split_batches:
|
446 |
+
value: null
|
447 |
+
suppress_tokens:
|
448 |
+
value: null
|
449 |
+
task_specific_params:
|
450 |
+
value: null
|
451 |
+
temperature:
|
452 |
+
value: 1
|
453 |
+
tf_legacy_loss:
|
454 |
+
value: false
|
455 |
+
tf32:
|
456 |
+
value: null
|
457 |
+
tie_encoder_decoder:
|
458 |
+
value: false
|
459 |
+
tie_word_embeddings:
|
460 |
+
value: true
|
461 |
+
tokenizer_class:
|
462 |
+
value: null
|
463 |
+
top_k:
|
464 |
+
value: 50
|
465 |
+
top_p:
|
466 |
+
value: 1
|
467 |
+
torch_compile:
|
468 |
+
value: false
|
469 |
+
torch_compile_backend:
|
470 |
+
value: null
|
471 |
+
torch_compile_mode:
|
472 |
+
value: null
|
473 |
+
torch_dtype:
|
474 |
+
value: float32
|
475 |
+
torch_empty_cache_steps:
|
476 |
+
value: null
|
477 |
+
torchdynamo:
|
478 |
+
value: null
|
479 |
+
torchscript:
|
480 |
+
value: false
|
481 |
+
tpu_metrics_debug:
|
482 |
+
value: false
|
483 |
+
tpu_num_cores:
|
484 |
+
value: null
|
485 |
+
transformers_version:
|
486 |
+
value: 4.49.0.dev0
|
487 |
+
typical_p:
|
488 |
+
value: 1
|
489 |
+
use_bfloat16:
|
490 |
+
value: false
|
491 |
+
use_cache:
|
492 |
+
value: false
|
493 |
+
use_cpu:
|
494 |
+
value: false
|
495 |
+
use_ipex:
|
496 |
+
value: false
|
497 |
+
use_legacy_prediction_loop:
|
498 |
+
value: false
|
499 |
+
use_liger_kernel:
|
500 |
+
value: false
|
501 |
+
use_mps_device:
|
502 |
+
value: false
|
503 |
+
use_weighted_layer_sum:
|
504 |
+
value: false
|
505 |
+
vocab_size:
|
506 |
+
value: 51865
|
507 |
+
warmup_ratio:
|
508 |
+
value: 0
|
509 |
+
warmup_steps:
|
510 |
+
value: 500
|
511 |
+
weight_decay:
|
512 |
+
value: 0
|
wandb/run-20250212_125202-c6xjc1gs/files/output.log
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
+
main()
|
4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
+
return inner_training_loop(
|
9 |
+
^^^^^^^^^^^^^^^^^^^^
|
10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
+
result = getattr(callback, event)(
|
18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiosignal==1.3.2
|
2 |
+
Markdown==3.7
|
3 |
+
more-itertools==10.6.0
|
4 |
+
requests==2.32.3
|
5 |
+
sentry-sdk==2.21.0
|
6 |
+
torchaudio==2.6.0
|
7 |
+
charset-normalizer==3.4.1
|
8 |
+
docker-pycreds==0.4.0
|
9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
10 |
+
PyYAML==6.0.2
|
11 |
+
librosa==0.10.2.post1
|
12 |
+
soxr==0.5.0.post1
|
13 |
+
multiprocess==0.70.16
|
14 |
+
setuptools==75.8.0
|
15 |
+
nvidia-cufft-cu12==11.2.1.3
|
16 |
+
joblib==1.4.2
|
17 |
+
pytz==2025.1
|
18 |
+
pip==24.0
|
19 |
+
scikit-learn==1.6.1
|
20 |
+
certifi==2025.1.31
|
21 |
+
jiwer==3.1.0
|
22 |
+
regex==2024.11.6
|
23 |
+
annotated-types==0.7.0
|
24 |
+
grpcio==1.70.0
|
25 |
+
msgpack==1.1.0
|
26 |
+
mpmath==1.3.0
|
27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
28 |
+
soundfile==0.13.1
|
29 |
+
dill==0.3.8
|
30 |
+
nvidia-nvtx-cu12==12.4.127
|
31 |
+
six==1.17.0
|
32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
+
pyarrow==19.0.0
|
34 |
+
nvidia-nccl-cu12==2.21.5
|
35 |
+
psutil==6.1.1
|
36 |
+
decorator==5.1.1
|
37 |
+
llvmlite==0.44.0
|
38 |
+
frozenlist==1.5.0
|
39 |
+
pydantic==2.10.6
|
40 |
+
networkx==3.4.2
|
41 |
+
idna==3.10
|
42 |
+
wandb==0.19.6
|
43 |
+
aiohttp==3.11.12
|
44 |
+
RapidFuzz==3.12.1
|
45 |
+
pandas==2.2.3
|
46 |
+
python-dateutil==2.9.0.post0
|
47 |
+
numpy==2.1.3
|
48 |
+
tokenizers==0.21.0
|
49 |
+
nvidia-cusparselt-cu12==0.6.2
|
50 |
+
typing_extensions==4.12.2
|
51 |
+
urllib3==2.3.0
|
52 |
+
setproctitle==1.3.4
|
53 |
+
tzdata==2025.1
|
54 |
+
sympy==1.13.1
|
55 |
+
pooch==1.8.2
|
56 |
+
click==8.1.8
|
57 |
+
pydantic_core==2.27.2
|
58 |
+
MarkupSafe==3.0.2
|
59 |
+
scipy==1.15.1
|
60 |
+
accelerate==1.3.0
|
61 |
+
tensorboard==2.19.0
|
62 |
+
protobuf==5.29.3
|
63 |
+
gitdb==4.0.12
|
64 |
+
smmap==5.0.2
|
65 |
+
absl-py==2.1.0
|
66 |
+
tqdm==4.67.1
|
67 |
+
yarl==1.18.3
|
68 |
+
pycparser==2.22
|
69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
70 |
+
attrs==25.1.0
|
71 |
+
lazy_loader==0.4
|
72 |
+
tensorboard-data-server==0.7.2
|
73 |
+
threadpoolctl==3.5.0
|
74 |
+
GitPython==3.1.44
|
75 |
+
safetensors==0.5.2
|
76 |
+
fsspec==2024.12.0
|
77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
+
filelock==3.17.0
|
79 |
+
aiohappyeyeballs==2.4.6
|
80 |
+
packaging==24.2
|
81 |
+
datasets==3.2.1.dev0
|
82 |
+
audioread==3.0.1
|
83 |
+
propcache==0.2.1
|
84 |
+
transformers==4.49.0.dev0
|
85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
+
cffi==1.17.1
|
87 |
+
evaluate==0.4.3
|
88 |
+
Werkzeug==3.1.3
|
89 |
+
huggingface-hub==0.28.1
|
90 |
+
Jinja2==3.1.5
|
91 |
+
torch==2.6.0
|
92 |
+
nvidia-curand-cu12==10.3.5.147
|
93 |
+
xxhash==3.5.0
|
94 |
+
platformdirs==4.3.6
|
95 |
+
multidict==6.1.0
|
96 |
+
nvidia-cublas-cu12==12.4.5.8
|
97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
98 |
+
triton==3.2.0
|
99 |
+
numba==0.61.0
|
100 |
+
importlib_metadata==8.0.0
|
101 |
+
platformdirs==4.2.2
|
102 |
+
typeguard==4.3.0
|
103 |
+
more-itertools==10.3.0
|
104 |
+
tomli==2.0.1
|
105 |
+
autocommand==2.2.2
|
106 |
+
zipp==3.19.2
|
107 |
+
typing_extensions==4.12.2
|
108 |
+
backports.tarfile==1.2.0
|
109 |
+
inflect==7.3.1
|
110 |
+
jaraco.text==3.12.1
|
111 |
+
wheel==0.43.0
|
112 |
+
packaging==24.2
|
113 |
+
jaraco.collections==5.1.0
|
114 |
+
jaraco.functools==4.0.1
|
115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "CPython 3.12.3",
|
4 |
+
"startedAt": "2025-02-12T12:52:03.105234Z",
|
5 |
+
"args": [
|
6 |
+
"--model_name_or_path=openai/whisper-small",
|
7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
+
"--language=basque",
|
9 |
+
"--train_split_name=train",
|
10 |
+
"--eval_split_name=test_parl",
|
11 |
+
"--model_index_name=Whisper Small Basque",
|
12 |
+
"--max_steps=8000",
|
13 |
+
"--output_dir=./",
|
14 |
+
"--per_device_train_batch_size=32",
|
15 |
+
"--per_device_eval_batch_size=16",
|
16 |
+
"--gradient_accumulation_steps=1",
|
17 |
+
"--logging_steps=25",
|
18 |
+
"--learning_rate=1e-5",
|
19 |
+
"--warmup_steps=500",
|
20 |
+
"--evaluation_strategy=steps",
|
21 |
+
"--eval_steps=1000",
|
22 |
+
"--save_strategy=steps",
|
23 |
+
"--save_steps=1000",
|
24 |
+
"--generation_max_length=225",
|
25 |
+
"--length_column_name=input_length",
|
26 |
+
"--max_duration_in_seconds=30",
|
27 |
+
"--text_column_name=sentence",
|
28 |
+
"--freeze_feature_encoder=False",
|
29 |
+
"--report_to=tensorboard",
|
30 |
+
"--metric_for_best_model=wer",
|
31 |
+
"--greater_is_better=False",
|
32 |
+
"--load_best_model_at_end",
|
33 |
+
"--gradient_checkpointing",
|
34 |
+
"--fp16",
|
35 |
+
"--overwrite_output_dir",
|
36 |
+
"--do_train",
|
37 |
+
"--do_eval",
|
38 |
+
"--predict_with_generate",
|
39 |
+
"--do_normalize_eval",
|
40 |
+
"--streaming",
|
41 |
+
"--use_auth_token",
|
42 |
+
"--push_to_hub",
|
43 |
+
"--report_to",
|
44 |
+
"wandb",
|
45 |
+
"--run_name",
|
46 |
+
"whisper-small-eu"
|
47 |
+
],
|
48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
+
"git": {
|
51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
+
},
|
54 |
+
"email": "[email protected]",
|
55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
+
"host": "tknika",
|
57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
+
"cpu_count": 8,
|
60 |
+
"cpu_count_logical": 8,
|
61 |
+
"gpu": "NVIDIA L40-48Q",
|
62 |
+
"gpu_count": 1,
|
63 |
+
"disk": {
|
64 |
+
"/": {
|
65 |
+
"total": "525987168256",
|
66 |
+
"used": "313777016832"
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"memory": {
|
70 |
+
"total": "33654022144"
|
71 |
+
},
|
72 |
+
"cpu": {
|
73 |
+
"count": 8,
|
74 |
+
"countLogical": 8
|
75 |
+
},
|
76 |
+
"gpu_nvidia": [
|
77 |
+
{
|
78 |
+
"name": "NVIDIA L40-48Q",
|
79 |
+
"memoryTotal": "51539607552",
|
80 |
+
"cudaCores": 18176,
|
81 |
+
"architecture": "Ada"
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"cudaVersion": "12.4"
|
85 |
+
}
|
wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
+
{"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112}
|
3 |
+
{"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}}
|
4 |
+
{"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"}
|
5 |
+
{"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
|
6 |
+
{"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
|
7 |
+
{"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"}
|
8 |
+
{"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"}
|
9 |
+
{"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"}
|
10 |
+
{"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"}
|
11 |
+
{"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"}
|
12 |
+
{"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"}
|
13 |
+
{"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"}
|
14 |
+
{"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"}
|
2 |
+
{"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"}
|
3 |
+
{"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"}
|
4 |
+
{"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"}
|
5 |
+
{"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"}
|
6 |
+
{"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"}
|
7 |
+
{"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
+
{"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"}
|
9 |
+
{"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
+
{"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
+
{"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
+
{"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"}
|
13 |
+
{"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"}
|
14 |
+
{"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"}
|
15 |
+
{"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"}
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-12 12:52:02,886 INFO MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112
|
3 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
|
7 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
|
8 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():756] calling init triggers
|
9 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
+
config: {'_wandb': {}}
|
11 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():789] starting backend
|
12 |
+
2025-02-12 12:52:03,097 INFO MainThread:226112 [wandb_init.py:init():793] sending inform_init request
|
13 |
+
2025-02-12 12:52:03,104 INFO MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
+
2025-02-12 12:52:03,104 INFO MainThread:226112 [wandb_init.py:init():808] backend started and connected
|
15 |
+
2025-02-12 12:52:03,107 INFO MainThread:226112 [wandb_init.py:init():901] updated telemetry
|
16 |
+
2025-02-12 12:52:03,114 INFO MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
+
2025-02-12 12:52:03,483 INFO MainThread:226112 [wandb_init.py:init():994] starting run threads in backend
|
18 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
+
2025-02-12 12:52:03,568 INFO MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
+
2025-02-12 12:52:03,569 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
+
2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7d4830f2ddf0>>
|
25 |
+
2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
+
2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb
ADDED
Binary file (11.3 kB). View file
|
|
wandb/run-20250212_125924-xhsgsxqq/files/config.yaml
ADDED
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_attn_implementation_autoset:
|
2 |
+
value: true
|
3 |
+
_name_or_path:
|
4 |
+
value: openai/whisper-small
|
5 |
+
_wandb:
|
6 |
+
value:
|
7 |
+
cli_version: 0.19.6
|
8 |
+
m:
|
9 |
+
- "1": train/global_step
|
10 |
+
"6":
|
11 |
+
- 3
|
12 |
+
"7": []
|
13 |
+
python_version: 3.12.3
|
14 |
+
t:
|
15 |
+
"1":
|
16 |
+
- 1
|
17 |
+
- 5
|
18 |
+
- 11
|
19 |
+
- 49
|
20 |
+
- 51
|
21 |
+
- 53
|
22 |
+
- 55
|
23 |
+
- 71
|
24 |
+
- 100
|
25 |
+
"2":
|
26 |
+
- 1
|
27 |
+
- 5
|
28 |
+
- 11
|
29 |
+
- 49
|
30 |
+
- 51
|
31 |
+
- 53
|
32 |
+
- 55
|
33 |
+
- 71
|
34 |
+
- 100
|
35 |
+
"3":
|
36 |
+
- 7
|
37 |
+
- 13
|
38 |
+
- 19
|
39 |
+
- 23
|
40 |
+
- 55
|
41 |
+
- 66
|
42 |
+
"4": 3.12.3
|
43 |
+
"5": 0.19.6
|
44 |
+
"6": 4.49.0.dev0
|
45 |
+
"8":
|
46 |
+
- 5
|
47 |
+
"9":
|
48 |
+
"1": transformers_trainer
|
49 |
+
"12": 0.19.6
|
50 |
+
"13": linux-x86_64
|
51 |
+
accelerator_config:
|
52 |
+
value:
|
53 |
+
dispatch_batches: null
|
54 |
+
even_batches: true
|
55 |
+
gradient_accumulation_kwargs: null
|
56 |
+
non_blocking: false
|
57 |
+
split_batches: false
|
58 |
+
use_seedable_sampler: true
|
59 |
+
activation_dropout:
|
60 |
+
value: 0
|
61 |
+
activation_function:
|
62 |
+
value: gelu
|
63 |
+
adafactor:
|
64 |
+
value: false
|
65 |
+
adam_beta1:
|
66 |
+
value: 0.9
|
67 |
+
adam_beta2:
|
68 |
+
value: 0.999
|
69 |
+
adam_epsilon:
|
70 |
+
value: 1e-08
|
71 |
+
add_cross_attention:
|
72 |
+
value: false
|
73 |
+
apply_spec_augment:
|
74 |
+
value: false
|
75 |
+
architectures:
|
76 |
+
value:
|
77 |
+
- WhisperForConditionalGeneration
|
78 |
+
attention_dropout:
|
79 |
+
value: 0
|
80 |
+
auto_find_batch_size:
|
81 |
+
value: false
|
82 |
+
average_tokens_across_devices:
|
83 |
+
value: false
|
84 |
+
bad_words_ids:
|
85 |
+
value: null
|
86 |
+
batch_eval_metrics:
|
87 |
+
value: false
|
88 |
+
begin_suppress_tokens:
|
89 |
+
value:
|
90 |
+
- 220
|
91 |
+
- 50257
|
92 |
+
bf16:
|
93 |
+
value: false
|
94 |
+
bf16_full_eval:
|
95 |
+
value: false
|
96 |
+
bos_token_id:
|
97 |
+
value: 50257
|
98 |
+
chunk_size_feed_forward:
|
99 |
+
value: 0
|
100 |
+
classifier_proj_size:
|
101 |
+
value: 256
|
102 |
+
cross_attention_hidden_size:
|
103 |
+
value: null
|
104 |
+
d_model:
|
105 |
+
value: 768
|
106 |
+
data_seed:
|
107 |
+
value: null
|
108 |
+
dataloader_drop_last:
|
109 |
+
value: false
|
110 |
+
dataloader_num_workers:
|
111 |
+
value: 0
|
112 |
+
dataloader_persistent_workers:
|
113 |
+
value: false
|
114 |
+
dataloader_pin_memory:
|
115 |
+
value: true
|
116 |
+
dataloader_prefetch_factor:
|
117 |
+
value: null
|
118 |
+
ddp_backend:
|
119 |
+
value: null
|
120 |
+
ddp_broadcast_buffers:
|
121 |
+
value: null
|
122 |
+
ddp_bucket_cap_mb:
|
123 |
+
value: null
|
124 |
+
ddp_find_unused_parameters:
|
125 |
+
value: null
|
126 |
+
ddp_timeout:
|
127 |
+
value: 1800
|
128 |
+
debug:
|
129 |
+
value: []
|
130 |
+
decoder_attention_heads:
|
131 |
+
value: 12
|
132 |
+
decoder_ffn_dim:
|
133 |
+
value: 3072
|
134 |
+
decoder_layerdrop:
|
135 |
+
value: 0
|
136 |
+
decoder_layers:
|
137 |
+
value: 12
|
138 |
+
decoder_start_token_id:
|
139 |
+
value: 50258
|
140 |
+
deepspeed:
|
141 |
+
value: null
|
142 |
+
disable_tqdm:
|
143 |
+
value: false
|
144 |
+
dispatch_batches:
|
145 |
+
value: null
|
146 |
+
diversity_penalty:
|
147 |
+
value: 0
|
148 |
+
do_eval:
|
149 |
+
value: true
|
150 |
+
do_predict:
|
151 |
+
value: false
|
152 |
+
do_sample:
|
153 |
+
value: false
|
154 |
+
do_train:
|
155 |
+
value: true
|
156 |
+
dropout:
|
157 |
+
value: 0
|
158 |
+
early_stopping:
|
159 |
+
value: false
|
160 |
+
encoder_attention_heads:
|
161 |
+
value: 12
|
162 |
+
encoder_ffn_dim:
|
163 |
+
value: 3072
|
164 |
+
encoder_layerdrop:
|
165 |
+
value: 0
|
166 |
+
encoder_layers:
|
167 |
+
value: 12
|
168 |
+
encoder_no_repeat_ngram_size:
|
169 |
+
value: 0
|
170 |
+
eos_token_id:
|
171 |
+
value: 50257
|
172 |
+
eval_accumulation_steps:
|
173 |
+
value: null
|
174 |
+
eval_delay:
|
175 |
+
value: 0
|
176 |
+
eval_do_concat_batches:
|
177 |
+
value: true
|
178 |
+
eval_on_start:
|
179 |
+
value: false
|
180 |
+
eval_steps:
|
181 |
+
value: 1000
|
182 |
+
eval_strategy:
|
183 |
+
value: steps
|
184 |
+
eval_use_gather_object:
|
185 |
+
value: false
|
186 |
+
evaluation_strategy:
|
187 |
+
value: steps
|
188 |
+
exponential_decay_length_penalty:
|
189 |
+
value: null
|
190 |
+
finetuning_task:
|
191 |
+
value: null
|
192 |
+
forced_bos_token_id:
|
193 |
+
value: null
|
194 |
+
forced_decoder_ids:
|
195 |
+
value: null
|
196 |
+
forced_eos_token_id:
|
197 |
+
value: null
|
198 |
+
fp16:
|
199 |
+
value: true
|
200 |
+
fp16_backend:
|
201 |
+
value: auto
|
202 |
+
fp16_full_eval:
|
203 |
+
value: false
|
204 |
+
fp16_opt_level:
|
205 |
+
value: O1
|
206 |
+
fsdp:
|
207 |
+
value: []
|
208 |
+
fsdp_config:
|
209 |
+
value:
|
210 |
+
min_num_params: 0
|
211 |
+
xla: false
|
212 |
+
xla_fsdp_grad_ckpt: false
|
213 |
+
xla_fsdp_v2: false
|
214 |
+
fsdp_min_num_params:
|
215 |
+
value: 0
|
216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
217 |
+
value: null
|
218 |
+
full_determinism:
|
219 |
+
value: false
|
220 |
+
generation_config:
|
221 |
+
value: null
|
222 |
+
generation_max_length:
|
223 |
+
value: 225
|
224 |
+
generation_num_beams:
|
225 |
+
value: null
|
226 |
+
gradient_accumulation_steps:
|
227 |
+
value: 1
|
228 |
+
gradient_checkpointing:
|
229 |
+
value: true
|
230 |
+
gradient_checkpointing_kwargs:
|
231 |
+
value: null
|
232 |
+
greater_is_better:
|
233 |
+
value: false
|
234 |
+
group_by_length:
|
235 |
+
value: false
|
236 |
+
half_precision_backend:
|
237 |
+
value: auto
|
238 |
+
hub_always_push:
|
239 |
+
value: false
|
240 |
+
hub_model_id:
|
241 |
+
value: null
|
242 |
+
hub_private_repo:
|
243 |
+
value: null
|
244 |
+
hub_strategy:
|
245 |
+
value: every_save
|
246 |
+
hub_token:
|
247 |
+
value: <HUB_TOKEN>
|
248 |
+
id2label:
|
249 |
+
value:
|
250 |
+
"0": LABEL_0
|
251 |
+
"1": LABEL_1
|
252 |
+
ignore_data_skip:
|
253 |
+
value: false
|
254 |
+
include_for_metrics:
|
255 |
+
value: []
|
256 |
+
include_inputs_for_metrics:
|
257 |
+
value: false
|
258 |
+
include_num_input_tokens_seen:
|
259 |
+
value: false
|
260 |
+
include_tokens_per_second:
|
261 |
+
value: false
|
262 |
+
init_std:
|
263 |
+
value: 0.02
|
264 |
+
is_decoder:
|
265 |
+
value: false
|
266 |
+
is_encoder_decoder:
|
267 |
+
value: true
|
268 |
+
jit_mode_eval:
|
269 |
+
value: false
|
270 |
+
label_names:
|
271 |
+
value: null
|
272 |
+
label_smoothing_factor:
|
273 |
+
value: 0
|
274 |
+
label2id:
|
275 |
+
value:
|
276 |
+
LABEL_0: 0
|
277 |
+
LABEL_1: 1
|
278 |
+
learning_rate:
|
279 |
+
value: 1e-05
|
280 |
+
length_column_name:
|
281 |
+
value: input_length
|
282 |
+
length_penalty:
|
283 |
+
value: 1
|
284 |
+
load_best_model_at_end:
|
285 |
+
value: true
|
286 |
+
local_rank:
|
287 |
+
value: 0
|
288 |
+
log_level:
|
289 |
+
value: passive
|
290 |
+
log_level_replica:
|
291 |
+
value: warning
|
292 |
+
log_on_each_node:
|
293 |
+
value: true
|
294 |
+
logging_dir:
|
295 |
+
value: ./runs/Feb12_12-58-59_tknika
|
296 |
+
logging_first_step:
|
297 |
+
value: false
|
298 |
+
logging_nan_inf_filter:
|
299 |
+
value: true
|
300 |
+
logging_steps:
|
301 |
+
value: 25
|
302 |
+
logging_strategy:
|
303 |
+
value: steps
|
304 |
+
lr_scheduler_type:
|
305 |
+
value: linear
|
306 |
+
mask_feature_length:
|
307 |
+
value: 10
|
308 |
+
mask_feature_min_masks:
|
309 |
+
value: 0
|
310 |
+
mask_feature_prob:
|
311 |
+
value: 0
|
312 |
+
mask_time_length:
|
313 |
+
value: 10
|
314 |
+
mask_time_min_masks:
|
315 |
+
value: 2
|
316 |
+
mask_time_prob:
|
317 |
+
value: 0.05
|
318 |
+
max_grad_norm:
|
319 |
+
value: 1
|
320 |
+
max_length:
|
321 |
+
value: 448
|
322 |
+
max_source_positions:
|
323 |
+
value: 1500
|
324 |
+
max_steps:
|
325 |
+
value: 8000
|
326 |
+
max_target_positions:
|
327 |
+
value: 448
|
328 |
+
median_filter_width:
|
329 |
+
value: 7
|
330 |
+
metric_for_best_model:
|
331 |
+
value: wer
|
332 |
+
min_length:
|
333 |
+
value: 0
|
334 |
+
model/num_parameters:
|
335 |
+
value: 241734912
|
336 |
+
model_type:
|
337 |
+
value: whisper
|
338 |
+
mp_parameters:
|
339 |
+
value: ""
|
340 |
+
neftune_noise_alpha:
|
341 |
+
value: null
|
342 |
+
no_cuda:
|
343 |
+
value: false
|
344 |
+
no_repeat_ngram_size:
|
345 |
+
value: 0
|
346 |
+
num_beam_groups:
|
347 |
+
value: 1
|
348 |
+
num_beams:
|
349 |
+
value: 1
|
350 |
+
num_hidden_layers:
|
351 |
+
value: 12
|
352 |
+
num_mel_bins:
|
353 |
+
value: 80
|
354 |
+
num_return_sequences:
|
355 |
+
value: 1
|
356 |
+
num_train_epochs:
|
357 |
+
value: 3
|
358 |
+
optim:
|
359 |
+
value: adamw_torch
|
360 |
+
optim_args:
|
361 |
+
value: null
|
362 |
+
optim_target_modules:
|
363 |
+
value: null
|
364 |
+
output_attentions:
|
365 |
+
value: false
|
366 |
+
output_dir:
|
367 |
+
value: ./
|
368 |
+
output_hidden_states:
|
369 |
+
value: false
|
370 |
+
output_scores:
|
371 |
+
value: false
|
372 |
+
overwrite_output_dir:
|
373 |
+
value: true
|
374 |
+
pad_token_id:
|
375 |
+
value: 50257
|
376 |
+
past_index:
|
377 |
+
value: -1
|
378 |
+
per_device_eval_batch_size:
|
379 |
+
value: 16
|
380 |
+
per_device_train_batch_size:
|
381 |
+
value: 32
|
382 |
+
per_gpu_eval_batch_size:
|
383 |
+
value: null
|
384 |
+
per_gpu_train_batch_size:
|
385 |
+
value: null
|
386 |
+
predict_with_generate:
|
387 |
+
value: true
|
388 |
+
prediction_loss_only:
|
389 |
+
value: false
|
390 |
+
prefix:
|
391 |
+
value: null
|
392 |
+
problem_type:
|
393 |
+
value: null
|
394 |
+
push_to_hub:
|
395 |
+
value: true
|
396 |
+
push_to_hub_model_id:
|
397 |
+
value: null
|
398 |
+
push_to_hub_organization:
|
399 |
+
value: null
|
400 |
+
push_to_hub_token:
|
401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
402 |
+
ray_scope:
|
403 |
+
value: last
|
404 |
+
remove_invalid_values:
|
405 |
+
value: false
|
406 |
+
remove_unused_columns:
|
407 |
+
value: true
|
408 |
+
repetition_penalty:
|
409 |
+
value: 1
|
410 |
+
report_to:
|
411 |
+
value:
|
412 |
+
- wandb
|
413 |
+
restore_callback_states_from_checkpoint:
|
414 |
+
value: false
|
415 |
+
resume_from_checkpoint:
|
416 |
+
value: null
|
417 |
+
return_dict:
|
418 |
+
value: true
|
419 |
+
return_dict_in_generate:
|
420 |
+
value: false
|
421 |
+
run_name:
|
422 |
+
value: whisper-small-eu
|
423 |
+
save_on_each_node:
|
424 |
+
value: false
|
425 |
+
save_only_model:
|
426 |
+
value: false
|
427 |
+
save_safetensors:
|
428 |
+
value: true
|
429 |
+
save_steps:
|
430 |
+
value: 1000
|
431 |
+
save_strategy:
|
432 |
+
value: steps
|
433 |
+
save_total_limit:
|
434 |
+
value: null
|
435 |
+
scale_embedding:
|
436 |
+
value: false
|
437 |
+
seed:
|
438 |
+
value: 42
|
439 |
+
sep_token_id:
|
440 |
+
value: null
|
441 |
+
skip_memory_metrics:
|
442 |
+
value: true
|
443 |
+
sortish_sampler:
|
444 |
+
value: false
|
445 |
+
split_batches:
|
446 |
+
value: null
|
447 |
+
suppress_tokens:
|
448 |
+
value: null
|
449 |
+
task_specific_params:
|
450 |
+
value: null
|
451 |
+
temperature:
|
452 |
+
value: 1
|
453 |
+
tf_legacy_loss:
|
454 |
+
value: false
|
455 |
+
tf32:
|
456 |
+
value: null
|
457 |
+
tie_encoder_decoder:
|
458 |
+
value: false
|
459 |
+
tie_word_embeddings:
|
460 |
+
value: true
|
461 |
+
tokenizer_class:
|
462 |
+
value: null
|
463 |
+
top_k:
|
464 |
+
value: 50
|
465 |
+
top_p:
|
466 |
+
value: 1
|
467 |
+
torch_compile:
|
468 |
+
value: false
|
469 |
+
torch_compile_backend:
|
470 |
+
value: null
|
471 |
+
torch_compile_mode:
|
472 |
+
value: null
|
473 |
+
torch_dtype:
|
474 |
+
value: float32
|
475 |
+
torch_empty_cache_steps:
|
476 |
+
value: null
|
477 |
+
torchdynamo:
|
478 |
+
value: null
|
479 |
+
torchscript:
|
480 |
+
value: false
|
481 |
+
tpu_metrics_debug:
|
482 |
+
value: false
|
483 |
+
tpu_num_cores:
|
484 |
+
value: null
|
485 |
+
transformers_version:
|
486 |
+
value: 4.49.0.dev0
|
487 |
+
typical_p:
|
488 |
+
value: 1
|
489 |
+
use_bfloat16:
|
490 |
+
value: false
|
491 |
+
use_cache:
|
492 |
+
value: false
|
493 |
+
use_cpu:
|
494 |
+
value: false
|
495 |
+
use_ipex:
|
496 |
+
value: false
|
497 |
+
use_legacy_prediction_loop:
|
498 |
+
value: false
|
499 |
+
use_liger_kernel:
|
500 |
+
value: false
|
501 |
+
use_mps_device:
|
502 |
+
value: false
|
503 |
+
use_weighted_layer_sum:
|
504 |
+
value: false
|
505 |
+
vocab_size:
|
506 |
+
value: 51865
|
507 |
+
warmup_ratio:
|
508 |
+
value: 0
|
509 |
+
warmup_steps:
|
510 |
+
value: 500
|
511 |
+
weight_decay:
|
512 |
+
value: 0
|