xezpeleta commited on
Commit
7cfb43e
·
verified ·
1 Parent(s): 9c97586

Training in progress, step 3000

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. config.json +2 -2
  3. inspect_dataset.py +26 -0
  4. model.safetensors +1 -1
  5. requirements.txt +11 -0
  6. run.sh +9 -6
  7. run_cv.sh +39 -0
  8. run_speech_recognition_seq2seq_streaming.py +144 -55
  9. run_speech_recognition_seq2seq_streaming_cv.py +657 -0
  10. tokenizer_config.json +1 -0
  11. training_args.bin +2 -2
  12. wandb/debug-internal.log +7 -0
  13. wandb/debug.log +25 -0
  14. wandb/run-20250212_121751-d4i88lzt/files/config.yaml +512 -0
  15. wandb/run-20250212_121751-d4i88lzt/files/output.log +22 -0
  16. wandb/run-20250212_121751-d4i88lzt/files/requirements.txt +115 -0
  17. wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json +85 -0
  18. wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json +1 -0
  19. wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log +14 -0
  20. wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log +15 -0
  21. wandb/run-20250212_121751-d4i88lzt/logs/debug.log +26 -0
  22. wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb +0 -0
  23. wandb/run-20250212_122637-v3d3ouvn/files/config.yaml +512 -0
  24. wandb/run-20250212_122637-v3d3ouvn/files/output.log +22 -0
  25. wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt +115 -0
  26. wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json +85 -0
  27. wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json +1 -0
  28. wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log +14 -0
  29. wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log +15 -0
  30. wandb/run-20250212_122637-v3d3ouvn/logs/debug.log +26 -0
  31. wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb +0 -0
  32. wandb/run-20250212_122854-4m048f5s/files/config.yaml +512 -0
  33. wandb/run-20250212_122854-4m048f5s/files/output.log +22 -0
  34. wandb/run-20250212_122854-4m048f5s/files/requirements.txt +115 -0
  35. wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json +85 -0
  36. wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json +1 -0
  37. wandb/run-20250212_122854-4m048f5s/logs/debug-core.log +14 -0
  38. wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log +15 -0
  39. wandb/run-20250212_122854-4m048f5s/logs/debug.log +26 -0
  40. wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb +0 -0
  41. wandb/run-20250212_125202-c6xjc1gs/files/config.yaml +512 -0
  42. wandb/run-20250212_125202-c6xjc1gs/files/output.log +22 -0
  43. wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt +115 -0
  44. wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json +85 -0
  45. wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json +1 -0
  46. wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log +14 -0
  47. wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log +15 -0
  48. wandb/run-20250212_125202-c6xjc1gs/logs/debug.log +26 -0
  49. wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb +0 -0
  50. wandb/run-20250212_125924-xhsgsxqq/files/config.yaml +512 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -31,7 +31,7 @@
31
  "mask_time_length": 10,
32
  "mask_time_min_masks": 2,
33
  "mask_time_prob": 0.05,
34
- "max_length": 448,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
37
  "median_filter_width": 7,
@@ -41,7 +41,7 @@
41
  "pad_token_id": 50257,
42
  "scale_embedding": false,
43
  "torch_dtype": "float32",
44
- "transformers_version": "4.46.0.dev0",
45
  "use_cache": false,
46
  "use_weighted_layer_sum": false,
47
  "vocab_size": 51865
 
31
  "mask_time_length": 10,
32
  "mask_time_min_masks": 2,
33
  "mask_time_prob": 0.05,
34
+ "max_length": null,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
37
  "median_filter_width": 7,
 
41
  "pad_token_id": 50257,
42
  "scale_embedding": false,
43
  "torch_dtype": "float32",
44
+ "transformers_version": "4.49.0.dev0",
45
  "use_cache": false,
46
  "use_weighted_layer_sum": false,
47
  "vocab_size": 51865
inspect_dataset.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import json
3
+
4
+ # Load the first few examples of the dataset
5
+ dataset = load_dataset("asierhv/composite_corpus_eu_v2.1", split="train", streaming=True)
6
+
7
+ # Get the first example
8
+ examples = []
9
+ for i, example in enumerate(dataset):
10
+ if i >= 3: # Get first 3 examples
11
+ break
12
+ examples.append(example)
13
+
14
+ # Print the structure and content
15
+ for i, example in enumerate(examples):
16
+ print(f"\nExample {i+1}:")
17
+ for key, value in example.items():
18
+ if key == "audio":
19
+ print(f"audio keys: {value.keys()}")
20
+ for audio_key, audio_value in value.items():
21
+ if isinstance(audio_value, bytes) or isinstance(audio_value, memoryview):
22
+ print(f" {audio_key}: <binary data>")
23
+ else:
24
+ print(f" {audio_key}: {audio_value}")
25
+ else:
26
+ print(f"{key}: {value}")
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51160e502a9753fabd3f154ffcebb40049941544eb344c4b8fc8243205072f73
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d2fd3b1746a32b70ee58ee1a3c90a88042e6300b79bcf3fd6d5bfc260af06f0
3
  size 966995080
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=1.7
2
+ torchaudio
3
+ git+https://github.com/huggingface/transformers
4
+ git+https://github.com/huggingface/datasets
5
+ librosa
6
+ jiwer
7
+ evaluate>=0.3.0
8
+ more-itertools
9
+ tensorboard
10
+ accelerate>=0.26.0
11
+ wandb>=0.19.6
run.sh CHANGED
@@ -1,12 +1,12 @@
 
1
  python run_speech_recognition_seq2seq_streaming.py \
2
  --model_name_or_path="openai/whisper-small" \
3
- --dataset_name="mozilla-foundation/common_voice_17_0" \
4
- --dataset_config_name="eu" \
5
  --language="basque" \
6
- --train_split_name="train+validation" \
7
- --eval_split_name="test" \
8
  --model_index_name="Whisper Small Basque" \
9
- --max_steps="5000" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="32" \
12
  --per_device_eval_batch_size="16" \
@@ -21,6 +21,7 @@ python run_speech_recognition_seq2seq_streaming.py \
21
  --generation_max_length="225" \
22
  --length_column_name="input_length" \
23
  --max_duration_in_seconds="30" \
 
24
  --text_column_name="sentence" \
25
  --freeze_feature_encoder="False" \
26
  --report_to="tensorboard" \
@@ -36,4 +37,6 @@ python run_speech_recognition_seq2seq_streaming.py \
36
  --do_normalize_eval \
37
  --streaming \
38
  --use_auth_token \
39
- --push_to_hub
 
 
 
1
+ WANDB_PROJECT=whisper-small-eu \
2
  python run_speech_recognition_seq2seq_streaming.py \
3
  --model_name_or_path="openai/whisper-small" \
4
+ --dataset_name="asierhv/composite_corpus_eu_v2.1" \
 
5
  --language="basque" \
6
+ --train_split_name="train" \
7
+ --eval_split_name="dev_parl+test_parl+test_cv+test_oslr" \
8
  --model_index_name="Whisper Small Basque" \
9
+ --max_steps="8000" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="32" \
12
  --per_device_eval_batch_size="16" \
 
21
  --generation_max_length="225" \
22
  --length_column_name="input_length" \
23
  --max_duration_in_seconds="30" \
24
+ --audio_column_name="audio" \
25
  --text_column_name="sentence" \
26
  --freeze_feature_encoder="False" \
27
  --report_to="tensorboard" \
 
37
  --do_normalize_eval \
38
  --streaming \
39
  --use_auth_token \
40
+ --push_to_hub \
41
+ --report_to "wandb" \
42
+ --run_name "whisper-small-eu"
run_cv.sh ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python run_speech_recognition_seq2seq_streaming.py \
2
+ --model_name_or_path="openai/whisper-small" \
3
+ --dataset_name="mozilla-foundation/common_voice_17_0" \
4
+ --dataset_config_name="eu" \
5
+ --language="basque" \
6
+ --train_split_name="train+validation" \
7
+ --eval_split_name="test" \
8
+ --model_index_name="Whisper Small Basque" \
9
+ --max_steps="5000" \
10
+ --output_dir="./" \
11
+ --per_device_train_batch_size="32" \
12
+ --per_device_eval_batch_size="16" \
13
+ --gradient_accumulation_steps="1" \
14
+ --logging_steps="25" \
15
+ --learning_rate="1e-5" \
16
+ --warmup_steps="500" \
17
+ --evaluation_strategy="steps" \
18
+ --eval_steps="1000" \
19
+ --save_strategy="steps" \
20
+ --save_steps="1000" \
21
+ --generation_max_length="225" \
22
+ --length_column_name="input_length" \
23
+ --max_duration_in_seconds="30" \
24
+ --text_column_name="sentence" \
25
+ --freeze_feature_encoder="False" \
26
+ --report_to="tensorboard" \
27
+ --metric_for_best_model="wer" \
28
+ --greater_is_better="False" \
29
+ --load_best_model_at_end \
30
+ --gradient_checkpointing \
31
+ --fp16 \
32
+ --overwrite_output_dir \
33
+ --do_train \
34
+ --do_eval \
35
+ --predict_with_generate \
36
+ --do_normalize_eval \
37
+ --streaming \
38
+ --use_auth_token \
39
+ --push_to_hub
run_speech_recognition_seq2seq_streaming.py CHANGED
@@ -25,6 +25,7 @@ import os
25
  import sys
26
  from dataclasses import dataclass, field
27
  from typing import Any, Dict, List, Optional, Union
 
28
 
29
  import datasets
30
  import torch
@@ -265,25 +266,58 @@ class DataCollatorSpeechSeq2SeqWithPadding:
265
  return batch
266
 
267
 
268
- def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
269
  """
270
- Utility function to load a dataset in streaming mode. For datasets with multiple splits,
271
- each split is loaded individually and then splits combined by taking alternating examples from
272
- each (interleaving).
273
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  if "+" in split:
275
- # load multiple splits separated by the `+` symbol with streaming mode
276
- dataset_splits = [
277
- load_dataset(dataset_name, dataset_config_name, split=split_name, streaming=streaming, trust_remote_code=True, **kwargs)
278
- for split_name in split.split("+")
279
- ]
280
- # interleave multiple splits to form one dataset
281
- interleaved_dataset = interleave_datasets(dataset_splits)
282
- return interleaved_dataset
283
  else:
284
- # load a single split *with* streaming mode
285
- dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
286
- return dataset
287
 
288
 
289
  def main():
@@ -356,37 +390,39 @@ def main():
356
  raw_datasets["train"] = load_maybe_streaming_dataset(
357
  data_args.dataset_name,
358
  data_args.dataset_config_name,
 
359
  split=data_args.train_split_name,
360
- # xezpeleta
361
- #use_auth_token=True if model_args.use_auth_token else None,
362
  streaming=data_args.streaming,
363
  )
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  if training_args.do_eval:
366
  raw_datasets["eval"] = load_maybe_streaming_dataset(
367
  data_args.dataset_name,
368
  data_args.dataset_config_name,
 
369
  split=data_args.eval_split_name,
370
- #use_auth_token=True if model_args.use_auth_token else None,
371
  streaming=data_args.streaming,
372
  )
373
 
374
- raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
375
-
376
- if data_args.audio_column_name not in raw_datasets_features:
377
- raise ValueError(
378
- f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
379
- "Make sure to set `--audio_column_name` to the correct audio column - one of "
380
- f"{', '.join(raw_datasets_features)}."
381
- )
382
-
383
- if data_args.text_column_name not in raw_datasets_features:
384
- raise ValueError(
385
- f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
386
- "Make sure to set `--text_column_name` to the correct text column - one of "
387
- f"{', '.join(raw_datasets_features)}."
388
- )
389
-
390
  # 5. Load pretrained model, tokenizer, and feature extractor
391
  #
392
  # Distributed training:
@@ -438,14 +474,12 @@ def main():
438
  tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
439
 
440
  # 6. Resample speech dataset if necessary
441
- dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
442
- if dataset_sampling_rate != feature_extractor.sampling_rate:
443
- raw_datasets = raw_datasets.cast_column(
444
- data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
445
- )
446
 
447
  # 7. Preprocessing the datasets.
448
- # We need to read the audio files as arrays and tokenize the targets.
449
  max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
450
  min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
451
  audio_column_name = data_args.audio_column_name
@@ -469,20 +503,59 @@ def main():
469
  else raw_datasets["eval"].select(range(data_args.max_eval_samples))
470
  )
471
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  def prepare_dataset(batch):
473
- # process audio
474
- sample = batch[audio_column_name]
475
- inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
476
- # process audio length
477
- batch[model_input_name] = inputs.get(model_input_name)[0]
478
- batch["input_length"] = len(sample["array"])
479
-
480
- # process targets
481
- input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
482
- if do_remove_punctuation:
483
- input_str = normalizer(input_str).strip()
484
- batch["labels"] = tokenizer(input_str).input_ids
485
- return batch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
  with training_args.main_process_first(desc="dataset map pre-processing"):
488
  vectorized_datasets = raw_datasets.map(
@@ -490,6 +563,16 @@ def main():
490
  remove_columns=raw_datasets_features,
491
  ).with_format("torch")
492
 
 
 
 
 
 
 
 
 
 
 
493
  if training_args.do_train and data_args.streaming:
494
  # manually shuffle if streaming (done by the trainer for non-streaming)
495
  vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
@@ -551,7 +634,13 @@ def main():
551
  # Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
552
  # Only required for streaming: Trainer automatically shuffles non-streaming datasets
553
  class ShuffleCallback(TrainerCallback):
554
- def on_epoch_begin(self, args, state, control, train_dataloader, **kwargs):
 
 
 
 
 
 
555
  if isinstance(train_dataloader.dataset, IterableDatasetShard):
556
  pass # set_epoch() is handled by the Trainer
557
  elif isinstance(train_dataloader.dataset, IterableDataset):
@@ -563,7 +652,7 @@ def main():
563
  args=training_args,
564
  train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
565
  eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
566
- tokenizer=feature_extractor,
567
  data_collator=data_collator,
568
  compute_metrics=compute_metrics if training_args.predict_with_generate else None,
569
  callbacks=[ShuffleCallback()] if data_args.streaming else None,
 
25
  import sys
26
  from dataclasses import dataclass, field
27
  from typing import Any, Dict, List, Optional, Union
28
+ import numpy
29
 
30
  import datasets
31
  import torch
 
266
  return batch
267
 
268
 
269
+ def load_maybe_streaming_dataset(dataset_name, dataset_config_name, data_args, split="train", streaming=True, **kwargs):
270
  """
271
+ Utility function to load a dataset in streaming mode.
 
 
272
  """
273
+ logger.info(f"Loading dataset {dataset_name} split {split} (streaming={streaming})")
274
+
275
+ def load_single_split(split_name):
276
+ logger.info(f"Loading split: {split_name}")
277
+ ds = load_dataset(
278
+ dataset_name,
279
+ dataset_config_name,
280
+ split=split_name,
281
+ streaming=streaming,
282
+ trust_remote_code=True,
283
+ **kwargs
284
+ )
285
+
286
+ # Add validation transform to ensure consistent audio format
287
+ def validate_example(example):
288
+ if not isinstance(example[data_args.audio_column_name], dict):
289
+ example[data_args.audio_column_name] = {
290
+ 'array': example[data_args.audio_column_name].array,
291
+ 'sampling_rate': example[data_args.audio_column_name].sampling_rate,
292
+ 'path': getattr(example[data_args.audio_column_name], 'path', None)
293
+ }
294
+ return example
295
+
296
+ ds = ds.map(validate_example)
297
+
298
+ # Log first example structure for debugging
299
+ try:
300
+ first_example = next(iter(ds))
301
+ logger.info(f"First example from {split_name}:")
302
+ logger.info(f" Keys: {first_example.keys()}")
303
+ if data_args.audio_column_name in first_example:
304
+ audio = first_example[data_args.audio_column_name]
305
+ logger.info(f" Audio type: {type(audio)}")
306
+ if isinstance(audio, dict):
307
+ logger.info(f" Audio keys: {audio.keys()}")
308
+ logger.info(f" Array type: {type(audio['array']) if 'array' in audio else 'missing'}")
309
+ except Exception as e:
310
+ logger.warning(f"Could not inspect first example from {split_name}: {e}")
311
+
312
+ return ds
313
+
314
  if "+" in split:
315
+ # Load and validate each split individually
316
+ dataset_splits = [load_single_split(split_name) for split_name in split.split("+")]
317
+ # Interleave datasets
318
+ return interleave_datasets(dataset_splits)
 
 
 
 
319
  else:
320
+ return load_single_split(split)
 
 
321
 
322
 
323
  def main():
 
390
  raw_datasets["train"] = load_maybe_streaming_dataset(
391
  data_args.dataset_name,
392
  data_args.dataset_config_name,
393
+ data_args,
394
  split=data_args.train_split_name,
 
 
395
  streaming=data_args.streaming,
396
  )
397
 
398
+ # Get features from train dataset since it's guaranteed to exist if do_train is True
399
+ train_dataset = raw_datasets["train"]
400
+ first_example = next(iter(train_dataset))
401
+ raw_datasets_features = list(first_example.keys())
402
+
403
+ if data_args.audio_column_name not in raw_datasets_features:
404
+ raise ValueError(
405
+ f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
406
+ "Make sure to set `--audio_column_name` to the correct audio column - one of "
407
+ f"{', '.join(raw_datasets_features)}."
408
+ )
409
+
410
+ if data_args.text_column_name not in raw_datasets_features:
411
+ raise ValueError(
412
+ f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
413
+ "Make sure to set `--text_column_name` to the correct text column - one of "
414
+ f"{', '.join(raw_datasets_features)}."
415
+ )
416
+
417
  if training_args.do_eval:
418
  raw_datasets["eval"] = load_maybe_streaming_dataset(
419
  data_args.dataset_name,
420
  data_args.dataset_config_name,
421
+ data_args,
422
  split=data_args.eval_split_name,
 
423
  streaming=data_args.streaming,
424
  )
425
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  # 5. Load pretrained model, tokenizer, and feature extractor
427
  #
428
  # Distributed training:
 
474
  tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
475
 
476
  # 6. Resample speech dataset if necessary
477
+ # For streaming datasets with audio bytes, sampling rate is handled in prepare_dataset
478
+ logger.info("Using feature extractor sampling rate: %d", feature_extractor.sampling_rate)
479
+ dataset_sampling_rate = feature_extractor.sampling_rate
 
 
480
 
481
  # 7. Preprocessing the datasets.
482
+ logger.info("Starting dataset preprocessing")
483
  max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
484
  min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
485
  audio_column_name = data_args.audio_column_name
 
503
  else raw_datasets["eval"].select(range(data_args.max_eval_samples))
504
  )
505
 
506
+ # Inspect dataset before processing
507
+ for split, dataset in raw_datasets.items():
508
+ try:
509
+ first_example = next(iter(dataset))
510
+ logger.info(f"First example from {split} before processing:")
511
+ logger.info(f"Keys: {first_example.keys()}")
512
+ if audio_column_name in first_example:
513
+ audio_data = first_example[audio_column_name]
514
+ logger.info(f"Audio column type: {type(audio_data)}")
515
+ if isinstance(audio_data, dict):
516
+ logger.info(f"Audio keys: {audio_data.keys()}")
517
+ except Exception as e:
518
+ logger.warning(f"Could not inspect first example from {split}: {e}")
519
+
520
  def prepare_dataset(batch):
521
+ try:
522
+ # Validate audio format
523
+ audio = batch[audio_column_name]
524
+
525
+ # Load audio from bytes if needed
526
+ if isinstance(audio, dict) and 'bytes' in audio:
527
+ import io
528
+ import soundfile as sf
529
+ audio_bytes = io.BytesIO(audio['bytes'])
530
+ audio_array, sampling_rate = sf.read(audio_bytes)
531
+ audio = {'array': audio_array, 'sampling_rate': sampling_rate}
532
+
533
+ # Process audio through feature extractor
534
+ inputs = feature_extractor(audio['array'], sampling_rate=audio['sampling_rate'])
535
+ batch["input_length"] = len(audio['array'])
536
+ batch[model_input_name] = inputs[model_input_name][0]
537
+
538
+ # Process text
539
+ input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
540
+ if do_remove_punctuation:
541
+ input_str = normalizer(input_str).strip()
542
+ batch["labels"] = tokenizer(input_str).input_ids
543
+
544
+ return batch
545
+
546
+ except Exception as e:
547
+ logger.error(f"Error processing batch in prepare_dataset:")
548
+ logger.error(f" Error type: {type(e).__name__}")
549
+ logger.error(f" Error message: {str(e)}")
550
+ logger.error(f" Batch keys: {list(batch.keys())}")
551
+ if audio_column_name in batch:
552
+ audio_data = batch[audio_column_name]
553
+ logger.error(f" Audio type: {type(audio_data)}")
554
+ if isinstance(audio_data, dict):
555
+ logger.error(f" Audio keys: {list(audio_data.keys())}")
556
+ elif hasattr(audio_data, '__dict__'):
557
+ logger.error(f" Audio attributes: {dir(audio_data)}")
558
+ raise
559
 
560
  with training_args.main_process_first(desc="dataset map pre-processing"):
561
  vectorized_datasets = raw_datasets.map(
 
563
  remove_columns=raw_datasets_features,
564
  ).with_format("torch")
565
 
566
+ # Inspect vectorized dataset
567
+ for split, dataset in vectorized_datasets.items():
568
+ try:
569
+ first_example = next(iter(dataset))
570
+ logger.info(f"First example from {split} after processing:")
571
+ logger.info(f"Keys: {first_example.keys()}")
572
+ logger.info(f"Types: {', '.join(f'{k}: {type(v)}' for k, v in first_example.items())}")
573
+ except Exception as e:
574
+ logger.warning(f"Could not inspect first example from vectorized {split}: {e}")
575
+
576
  if training_args.do_train and data_args.streaming:
577
  # manually shuffle if streaming (done by the trainer for non-streaming)
578
  vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
 
634
  # Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
635
  # Only required for streaming: Trainer automatically shuffles non-streaming datasets
636
  class ShuffleCallback(TrainerCallback):
637
+ def on_train_begin(self, args, state, control, **kwargs):
638
+ self.trainer = kwargs.get('trainer')
639
+
640
+ def on_epoch_begin(self, args, state, control, **kwargs):
641
+ if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
642
+ return
643
+ train_dataloader = self.trainer.train_dataloader
644
  if isinstance(train_dataloader.dataset, IterableDatasetShard):
645
  pass # set_epoch() is handled by the Trainer
646
  elif isinstance(train_dataloader.dataset, IterableDataset):
 
652
  args=training_args,
653
  train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
654
  eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
655
+ processing_class=feature_extractor,
656
  data_collator=data_collator,
657
  compute_metrics=compute_metrics if training_args.predict_with_generate else None,
658
  callbacks=[ShuffleCallback()] if data_args.streaming else None,
run_speech_recognition_seq2seq_streaming_cv.py ADDED
@@ -0,0 +1,657 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2022 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """
17
+ Fine-tuning the library models for sequence to sequence speech recognition
18
+ with 🤗 Datasets' streaming mode.
19
+ """
20
+ # You can also adapt this script for your own sequence to sequence speech
21
+ # recognition task. Pointers for this are left as comments.
22
+
23
+ import logging
24
+ import os
25
+ import sys
26
+ from dataclasses import dataclass, field
27
+ from typing import Any, Dict, List, Optional, Union
28
+ import numpy
29
+
30
+ import datasets
31
+ import torch
32
+ from datasets import DatasetDict, IterableDatasetDict, interleave_datasets, load_dataset
33
+ from torch.utils.data import IterableDataset
34
+
35
+ import evaluate
36
+ import transformers
37
+ from transformers import (
38
+ AutoConfig,
39
+ AutoFeatureExtractor,
40
+ AutoModelForSpeechSeq2Seq,
41
+ AutoProcessor,
42
+ AutoTokenizer,
43
+ HfArgumentParser,
44
+ Seq2SeqTrainer,
45
+ Seq2SeqTrainingArguments,
46
+ TrainerCallback,
47
+ set_seed,
48
+ )
49
+ from transformers.models.whisper.english_normalizer import BasicTextNormalizer
50
+ from transformers.trainer_pt_utils import IterableDatasetShard
51
+ from transformers.trainer_utils import get_last_checkpoint, is_main_process
52
+ from transformers.utils import check_min_version, send_example_telemetry
53
+ from transformers.utils.versions import require_version
54
+
55
+
56
+ # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
57
+ check_min_version("4.25.0.dev0")
58
+
59
+ require_version("datasets>=1.18.2", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
60
+
61
+ logger = logging.getLogger(__name__)
62
+
63
+
64
+ @dataclass
65
+ class ModelArguments:
66
+ """
67
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
68
+ """
69
+
70
+ model_name_or_path: str = field(
71
+ metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
72
+ )
73
+ config_name: Optional[str] = field(
74
+ default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
75
+ )
76
+ tokenizer_name: Optional[str] = field(
77
+ default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
78
+ )
79
+ feature_extractor_name: Optional[str] = field(
80
+ default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
81
+ )
82
+ cache_dir: Optional[str] = field(
83
+ default=None,
84
+ metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
85
+ )
86
+ use_fast_tokenizer: bool = field(
87
+ default=True,
88
+ metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
89
+ )
90
+ model_revision: str = field(
91
+ default="main",
92
+ metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
93
+ )
94
+ use_auth_token: bool = field(
95
+ default=False,
96
+ metadata={
97
+ "help": (
98
+ "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
99
+ "with private models)."
100
+ )
101
+ },
102
+ )
103
+ freeze_feature_encoder: bool = field(
104
+ default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
105
+ )
106
+ freeze_encoder: bool = field(
107
+ default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
108
+ )
109
+ forced_decoder_ids: List[List[int]] = field(
110
+ default=None,
111
+ metadata={
112
+ "help": (
113
+ "A list of pairs of integers which indicates a mapping from generation indices to token indices "
114
+ "that will be forced before sampling. For example, [[0, 123]] means the first generated token "
115
+ "will always be a token of index 123."
116
+ )
117
+ },
118
+ )
119
+ suppress_tokens: List[int] = field(
120
+ default=None, metadata={"help": "A list of tokens that will be suppressed at generation."}
121
+ )
122
+ model_index_name: str = field(default=None, metadata={"help": "Pretty name for the model card."})
123
+
124
+
125
+ @dataclass
126
+ class DataTrainingArguments:
127
+ """
128
+ Arguments pertaining to what data we are going to input our model for training and eval.
129
+ """
130
+
131
+ dataset_name: str = field(
132
+ default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
133
+ )
134
+ dataset_config_name: Optional[str] = field(
135
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
136
+ )
137
+ text_column: Optional[str] = field(
138
+ default=None,
139
+ metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
140
+ )
141
+ max_train_samples: Optional[int] = field(
142
+ default=None,
143
+ metadata={
144
+ "help": (
145
+ "For debugging purposes or quicker training, truncate the number of training examples to this "
146
+ "value if set."
147
+ )
148
+ },
149
+ )
150
+ max_eval_samples: Optional[int] = field(
151
+ default=None,
152
+ metadata={
153
+ "help": (
154
+ "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
155
+ "value if set."
156
+ )
157
+ },
158
+ )
159
+ audio_column_name: str = field(
160
+ default="audio",
161
+ metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
162
+ )
163
+ text_column_name: str = field(
164
+ default="text",
165
+ metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
166
+ )
167
+ max_duration_in_seconds: float = field(
168
+ default=20.0,
169
+ metadata={
170
+ "help": (
171
+ "Truncate audio files that are longer than `max_duration_in_seconds` seconds to"
172
+ " 'max_duration_in_seconds`"
173
+ )
174
+ },
175
+ )
176
+ min_duration_in_seconds: float = field(
177
+ default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
178
+ )
179
+ train_split_name: str = field(
180
+ default="train",
181
+ metadata={
182
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
183
+ },
184
+ )
185
+ eval_split_name: str = field(
186
+ default="test",
187
+ metadata={
188
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
189
+ },
190
+ )
191
+ do_lower_case: bool = field(
192
+ default=False,
193
+ metadata={"help": "Whether the target text should be lower cased."},
194
+ )
195
+ do_remove_punctuation: bool = field(
196
+ default=False,
197
+ metadata={"help": "Whether the target text should be striped of punctuation."},
198
+ )
199
+ do_normalize_eval: bool = field(
200
+ default=True,
201
+ metadata={"help": "Whether to normalise the references and predictions in the eval WER calculation."},
202
+ )
203
+ language: str = field(
204
+ default=None,
205
+ metadata={
206
+ "help": (
207
+ "Language for multilingual fine-tuning. This argument should be set for multilingual fine-tuning "
208
+ "only. For English speech recognition, it should be set to `None`."
209
+ )
210
+ },
211
+ )
212
+ task: str = field(
213
+ default="transcribe",
214
+ metadata={"help": "Task, either `transcribe` for speech recognition or `translate` for speech translation."},
215
+ )
216
+ shuffle_buffer_size: Optional[int] = field(
217
+ default=500,
218
+ metadata={
219
+ "help": (
220
+ "The number of streamed examples to download before shuffling them. The large the buffer, "
221
+ "the closer it is to real offline shuffling."
222
+ )
223
+ },
224
+ )
225
+ streaming: bool = field(
226
+ default=True,
227
+ metadata={"help": "Whether to use streaming mode to load and pre-process the data."},
228
+ )
229
+
230
+
231
+ @dataclass
232
+ class DataCollatorSpeechSeq2SeqWithPadding:
233
+ """
234
+ Data collator that will dynamically pad the inputs received.
235
+ Args:
236
+ processor ([`WhisperProcessor`])
237
+ The processor used for processing the data.
238
+ decoder_start_token_id (`int`)
239
+ The begin-of-sentence of the decoder.
240
+ """
241
+
242
+ processor: Any
243
+ decoder_start_token_id: int
244
+
245
+ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
246
+ # split inputs and labels since they have to be of different lengths and need
247
+ # different padding methods
248
+ model_input_name = self.processor.model_input_names[0]
249
+ input_features = [{model_input_name: feature[model_input_name]} for feature in features]
250
+ label_features = [{"input_ids": feature["labels"]} for feature in features]
251
+
252
+ batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")
253
+
254
+ labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")
255
+
256
+ # replace padding with -100 to ignore loss correctly
257
+ labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
258
+
259
+ # if bos token is appended in previous tokenization step,
260
+ # cut bos token here as it's append later anyways
261
+ if (labels[:, 0] == self.decoder_start_token_id).all().cpu().item():
262
+ labels = labels[:, 1:]
263
+
264
+ batch["labels"] = labels
265
+
266
+ return batch
267
+
268
+
269
+ def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
270
+ """
271
+ Utility function to load a dataset in streaming mode. For datasets with multiple splits,
272
+ each split is loaded individually and then splits combined by taking alternating examples from
273
+ each (interleaving).
274
+ """
275
+ if ("+" in split):
276
+ # load multiple splits separated by the `+` symbol with streaming mode
277
+ dataset_splits = [
278
+ load_dataset(dataset_name, dataset_config_name, split=split_name, streaming=streaming, trust_remote_code=True, **kwargs)
279
+ for split_name in split.split("+")
280
+ ]
281
+ # interleave multiple splits to form one dataset
282
+ interleaved_dataset = interleave_datasets(dataset_splits)
283
+ return interleaved_dataset
284
+ else:
285
+ # load a single split *with* streaming mode
286
+ dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
287
+ return dataset
288
+
289
+
290
+ def main():
291
+ # 1. Parse input arguments
292
+ # See all possible arguments in src/transformers/training_args.py
293
+ # or by passing the --help flag to this script.
294
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
295
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments))
296
+
297
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
298
+ # If we pass only one argument to the script and it's the path to a json file,
299
+ # let's parse it to get our arguments.
300
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
301
+ else:
302
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
303
+
304
+ # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
305
+ # information sent is the one passed as arguments along with your Python/PyTorch versions.
306
+ send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
307
+
308
+ # 2. Setup logging
309
+ logging.basicConfig(
310
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
311
+ datefmt="%m/%d/%Y %H:%M:%S",
312
+ handlers=[logging.StreamHandler(sys.stdout)],
313
+ )
314
+ log_level = training_args.get_process_log_level()
315
+ logger.setLevel(log_level)
316
+ datasets.utils.logging.set_verbosity(log_level)
317
+ transformers.utils.logging.set_verbosity(log_level)
318
+ transformers.utils.logging.enable_default_handler()
319
+ transformers.utils.logging.enable_explicit_format()
320
+
321
+ logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
322
+
323
+ # Log on each process the small summary:
324
+ logger.warning(
325
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
326
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
327
+ )
328
+ logger.info(f"Training/evaluation parameters {training_args}")
329
+
330
+ # Set the verbosity to info of the Transformers logger (on main process only):
331
+ if is_main_process(training_args.local_rank):
332
+ transformers.utils.logging.set_verbosity_info()
333
+ logger.info("Training/evaluation parameters %s", training_args)
334
+
335
+ # 3. Detecting last checkpoint and eventually continue from last checkpoint
336
+ last_checkpoint = None
337
+ if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
338
+ last_checkpoint = get_last_checkpoint(training_args.output_dir)
339
+ if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
340
+ raise ValueError(
341
+ f"Output directory ({training_args.output_dir}) already exists and is not empty. "
342
+ "Use --overwrite_output_dir to overcome."
343
+ )
344
+ elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
345
+ logger.info(
346
+ f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
347
+ "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
348
+ )
349
+
350
+ # Set seed before initializing model.
351
+ set_seed(training_args.seed)
352
+
353
+ # 4. Load dataset
354
+ raw_datasets = IterableDatasetDict() if data_args.streaming else DatasetDict()
355
+
356
+ if training_args.do_train:
357
+ raw_datasets["train"] = load_maybe_streaming_dataset(
358
+ data_args.dataset_name,
359
+ data_args.dataset_config_name,
360
+ split=data_args.train_split_name,
361
+ #use_auth_token=True if model_args.use_auth_token else None,
362
+ streaming=data_args.streaming,
363
+ )
364
+
365
+ if training_args.do_eval:
366
+ raw_datasets["eval"] = load_maybe_streaming_dataset(
367
+ data_args.dataset_name,
368
+ data_args.dataset_config_name,
369
+ split=data_args.eval_split_name,
370
+ #use_auth_token=True if model_args.use_auth_token else None,
371
+ streaming=data_args.streaming,
372
+ )
373
+
374
+ raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
375
+
376
+ if data_args.audio_column_name not in raw_datasets_features:
377
+ raise ValueError(
378
+ f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
379
+ "Make sure to set `--audio_column_name` to the correct audio column - one of "
380
+ f"{', '.join(raw_datasets_features)}."
381
+ )
382
+
383
+ if data_args.text_column_name not in raw_datasets_features:
384
+ raise ValueError(
385
+ f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
386
+ "Make sure to set `--text_column_name` to the correct text column - one of "
387
+ f"{', '.join(raw_datasets_features)}."
388
+ )
389
+
390
+ # 5. Load pretrained model, tokenizer, and feature extractor
391
+ #
392
+ # Distributed training:
393
+ # The .from_pretrained methods guarantee that only one local process can concurrently
394
+ config = AutoConfig.from_pretrained(
395
+ model_args.config_name if model_args.config_name else model_args.model_name_or_path,
396
+ cache_dir=model_args.cache_dir,
397
+ revision=model_args.model_revision,
398
+ use_auth_token=True if model_args.use_auth_token else None,
399
+ )
400
+
401
+ config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens})
402
+
403
+ if training_args.gradient_checkpointing:
404
+ config.update({"use_cache": False})
405
+
406
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
407
+ model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
408
+ cache_dir=model_args.cache_dir,
409
+ revision=model_args.model_revision,
410
+ use_auth_token=True if model_args.use_auth_token else None,
411
+ )
412
+ tokenizer = AutoTokenizer.from_pretrained(
413
+ model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
414
+ cache_dir=model_args.cache_dir,
415
+ use_fast=model_args.use_fast_tokenizer,
416
+ revision=model_args.model_revision,
417
+ use_auth_token=True if model_args.use_auth_token else None,
418
+ )
419
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
420
+ model_args.model_name_or_path,
421
+ config=config,
422
+ cache_dir=model_args.cache_dir,
423
+ revision=model_args.model_revision,
424
+ use_auth_token=True if model_args.use_auth_token else None,
425
+ )
426
+
427
+ if model.config.decoder_start_token_id is None:
428
+ raise ValueError("Make sure that `config.decoder_start_token_id` is correctly defined")
429
+
430
+ if model_args.freeze_feature_encoder:
431
+ model.freeze_feature_encoder()
432
+
433
+ if model_args.freeze_encoder:
434
+ model.freeze_encoder()
435
+
436
+ if data_args.language is not None:
437
+ # We only need to set the task id when the language is specified (i.e. in a multilingual setting)
438
+ tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
439
+
440
+ # 6. Resample speech dataset if necessary
441
+ dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
442
+ if dataset_sampling_rate != feature_extractor.sampling_rate:
443
+ raw_datasets = raw_datasets.cast_column(
444
+ data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
445
+ )
446
+
447
+ # 7. Preprocessing the datasets.
448
+ # We need to read the audio files as arrays and tokenize the targets.
449
+ max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
450
+ min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
451
+ audio_column_name = data_args.audio_column_name
452
+ text_column_name = data_args.text_column_name
453
+ model_input_name = feature_extractor.model_input_names[0]
454
+ do_lower_case = data_args.do_lower_case
455
+ do_remove_punctuation = data_args.do_remove_punctuation
456
+ normalizer = BasicTextNormalizer() # 'official' text normalizer from OpenAI
457
+
458
+ if data_args.max_train_samples is not None:
459
+ raw_datasets["train"] = (
460
+ raw_datasets["train"].take(data_args.max_train_samples)
461
+ if data_args.streaming
462
+ else raw_datasets["train"].select(range(data_args.max_train_samples))
463
+ )
464
+
465
+ if data_args.max_eval_samples is not None:
466
+ raw_datasets["eval"] = (
467
+ raw_datasets["eval"].take(data_args.max_eval_samples)
468
+ if data_args.streaming
469
+ else raw_datasets["eval"].select(range(data_args.max_eval_samples))
470
+ )
471
+
472
+ def prepare_dataset(batch):
473
+ # process audio
474
+ sample = batch[audio_column_name]
475
+
476
+ # Handle different audio formats - some datasets provide raw arrays, others provide paths
477
+ if isinstance(sample, dict):
478
+ if "array" in sample:
479
+ audio_array = sample["array"]
480
+ sampling_rate = sample["sampling_rate"]
481
+ elif "path" in sample:
482
+ # Load from path if array is not available
483
+ audio_array = sample["path"] # datasets will load the file for us
484
+ sampling_rate = sample.get("sampling_rate", feature_extractor.sampling_rate)
485
+ else:
486
+ raise ValueError(f"Unsupported audio format. Sample must contain either 'array' or 'path'. Got {sample.keys()}")
487
+ else:
488
+ # Assume it's a direct path or array
489
+ audio_array = sample
490
+ sampling_rate = feature_extractor.sampling_rate
491
+
492
+ inputs = feature_extractor(audio_array, sampling_rate=sampling_rate)
493
+
494
+ # process audio length
495
+ if isinstance(audio_array, numpy.ndarray):
496
+ batch["input_length"] = len(audio_array)
497
+ else:
498
+ # If we couldn't get the direct array length, estimate it from the processed features
499
+ batch["input_length"] = inputs.get(model_input_name)[0].shape[0] * feature_extractor.hop_length
500
+
501
+ # process targets
502
+ input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
503
+ if do_remove_punctuation:
504
+ input_str = normalizer(input_str).strip()
505
+ batch["labels"] = tokenizer(input_str).input_ids
506
+ return batch
507
+
508
+ with training_args.main_process_first(desc="dataset map pre-processing"):
509
+ vectorized_datasets = raw_datasets.map(
510
+ prepare_dataset,
511
+ remove_columns=raw_datasets_features,
512
+ ).with_format("torch")
513
+
514
+ if training_args.do_train and data_args.streaming:
515
+ # manually shuffle if streaming (done by the trainer for non-streaming)
516
+ vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
517
+ buffer_size=data_args.shuffle_buffer_size,
518
+ seed=training_args.seed,
519
+ )
520
+
521
+ # filter training data that is shorter than min_input_length or longer than
522
+ # max_input_length
523
+ def is_audio_in_length_range(length):
524
+ return min_input_length < length < max_input_length
525
+
526
+ if training_args.do_train:
527
+ vectorized_datasets["train"] = vectorized_datasets["train"].filter(
528
+ is_audio_in_length_range,
529
+ input_columns=["input_length"],
530
+ )
531
+
532
+ # 8. Load Metric
533
+ metric = evaluate.load("wer")
534
+ do_normalize_eval = data_args.do_normalize_eval
535
+
536
+ def compute_metrics(pred):
537
+ pred_ids = pred.predictions
538
+
539
+ pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
540
+
541
+ pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
542
+ # we do not want to group tokens when computing the metrics
543
+ label_str = tokenizer.batch_decode(pred.label_ids, skip_special_tokens=True)
544
+
545
+ if do_normalize_eval:
546
+ pred_str = [normalizer(pred) for pred in pred_str]
547
+ label_str = [normalizer(label) for label in label_str]
548
+ # filtering step to only evaluate the samples that correspond to non-zero references:
549
+ pred_str = [pred_str[i] for i in range(len(pred_str)) if len(label_str[i]) > 0]
550
+ label_str = [label_str[i] for i in range(len(label_str)) if len(label_str[i]) > 0]
551
+
552
+ wer = 100 * metric.compute(predictions=pred_str, references=label_str)
553
+
554
+ return {"wer": wer}
555
+
556
+ # 9. Create a single speech processor
557
+ if is_main_process(training_args.local_rank):
558
+ # save feature extractor, tokenizer and config
559
+ feature_extractor.save_pretrained(training_args.output_dir)
560
+ tokenizer.save_pretrained(training_args.output_dir)
561
+ config.save_pretrained(training_args.output_dir)
562
+
563
+ processor = AutoProcessor.from_pretrained(training_args.output_dir)
564
+
565
+ # 10. Define data collator
566
+ data_collator = DataCollatorSpeechSeq2SeqWithPadding(
567
+ processor=processor,
568
+ decoder_start_token_id=model.config.decoder_start_token_id,
569
+ )
570
+
571
+ # 11. Configure Trainer
572
+ # Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
573
+ # Only required for streaming: Trainer automatically shuffles non-streaming datasets
574
+ class ShuffleCallback(TrainerCallback):
575
+ def on_train_begin(self, args, state, control, **kwargs):
576
+ self.trainer = kwargs.get('trainer')
577
+
578
+ def on_epoch_begin(self, args, state, control, **kwargs):
579
+ if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
580
+ return
581
+ train_dataloader = self.trainer.train_dataloader
582
+ if isinstance(train_dataloader.dataset, IterableDatasetShard):
583
+ pass # set_epoch() is handled by the Trainer
584
+ elif isinstance(train_dataloader.dataset, IterableDataset):
585
+ train_dataloader.dataset.set_epoch(train_dataloader.dataset._epoch + 1)
586
+
587
+ # Initialize Trainer
588
+ trainer = Seq2SeqTrainer(
589
+ model=model,
590
+ args=training_args,
591
+ train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
592
+ eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
593
+ processing_class=feature_extractor,
594
+ data_collator=data_collator,
595
+ compute_metrics=compute_metrics if training_args.predict_with_generate else None,
596
+ callbacks=[ShuffleCallback()] if data_args.streaming else None,
597
+ )
598
+
599
+ # 12. Training
600
+ if training_args.do_train:
601
+ checkpoint = None
602
+ if training_args.resume_from_checkpoint is not None:
603
+ checkpoint = training_args.resume_from_checkpoint
604
+ elif last_checkpoint is not None:
605
+ checkpoint = last_checkpoint
606
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
607
+ trainer.save_model() # Saves the feature extractor too for easy upload
608
+
609
+ metrics = train_result.metrics
610
+ if data_args.max_train_samples:
611
+ metrics["train_samples"] = data_args.max_train_samples
612
+ trainer.log_metrics("train", metrics)
613
+ trainer.save_metrics("train", metrics)
614
+ trainer.save_state()
615
+
616
+ # 13. Evaluation
617
+ results = {}
618
+ if training_args.do_eval:
619
+ logger.info("*** Evaluate ***")
620
+ metrics = trainer.evaluate(
621
+ metric_key_prefix="eval",
622
+ max_length=training_args.generation_max_length,
623
+ num_beams=training_args.generation_num_beams,
624
+ )
625
+ if data_args.max_eval_samples:
626
+ metrics["eval_samples"] = data_args.max_eval_samples
627
+
628
+ trainer.log_metrics("eval", metrics)
629
+ trainer.save_metrics("eval", metrics)
630
+
631
+ # 14. Write Training Stats
632
+ kwargs = {
633
+ "finetuned_from": model_args.model_name_or_path,
634
+ "tasks": "automatic-speech-recognition",
635
+ "tags": "whisper-event",
636
+ }
637
+ if data_args.dataset_name is not None:
638
+ kwargs["dataset_tags"] = data_args.dataset_name
639
+ if data_args.dataset_config_name is not None:
640
+ kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
641
+ else:
642
+ kwargs["dataset"] = data_args.dataset_name
643
+ if "common_voice" in data_args.dataset_name:
644
+ kwargs["language"] = data_args.dataset_config_name.split('-')[0]
645
+ if model_args.model_index_name is not None:
646
+ kwargs["model_name"] = model_args.model_index_name
647
+
648
+ if training_args.push_to_hub:
649
+ trainer.push_to_hub(**kwargs)
650
+ else:
651
+ trainer.create_model_card(**kwargs)
652
+
653
+ return results
654
+
655
+
656
+ if __name__ == "__main__":
657
+ main()
tokenizer_config.json CHANGED
@@ -12980,6 +12980,7 @@
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
 
12983
  "model_max_length": 1024,
12984
  "pad_token": "<|endoftext|>",
12985
  "processor_class": "WhisperProcessor",
 
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
12983
+ "extra_special_tokens": {},
12984
  "model_max_length": 1024,
12985
  "pad_token": "<|endoftext|>",
12986
  "processor_class": "WhisperProcessor",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e5015f6997af49dd4702cbba394870a18c74f3b62b5a4ffcc8bf3aa71cc41ee
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d41e6c93c9164dab27b25a4957996293e07bbed9895811c22360ffbda7ebbf
3
+ size 5432
wandb/debug-internal.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
2
+ {"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
3
+ {"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
4
+ {"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
5
+ {"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
6
+ {"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
7
+ {"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
wandb/debug.log ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
3
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
7
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
8
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():756] calling init triggers
9
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():789] starting backend
12
+ 2025-02-12 15:27:10,107 INFO MainThread:243546 [wandb_init.py:init():793] sending inform_init request
13
+ 2025-02-12 15:27:10,112 INFO MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-02-12 15:27:10,113 INFO MainThread:243546 [wandb_init.py:init():808] backend started and connected
15
+ 2025-02-12 15:27:10,115 INFO MainThread:243546 [wandb_init.py:init():901] updated telemetry
16
+ 2025-02-12 15:27:10,122 INFO MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
+ 2025-02-12 15:27:10,584 INFO MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
18
+ 2025-02-12 15:27:10,691 INFO MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
19
+ 2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
+ 2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
+ 2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
22
+ 2025-02-12 15:27:10,694 INFO MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
23
+ 2025-02-12 15:27:10,698 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
+ 2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
25
+ 2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
wandb/run-20250212_121751-d4i88lzt/files/config.yaml ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _attn_implementation_autoset:
2
+ value: true
3
+ _name_or_path:
4
+ value: openai/whisper-small
5
+ _wandb:
6
+ value:
7
+ cli_version: 0.19.6
8
+ m:
9
+ - "1": train/global_step
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ python_version: 3.12.3
14
+ t:
15
+ "1":
16
+ - 1
17
+ - 5
18
+ - 11
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 100
25
+ "2":
26
+ - 1
27
+ - 5
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 53
32
+ - 55
33
+ - 71
34
+ - 100
35
+ "3":
36
+ - 7
37
+ - 13
38
+ - 19
39
+ - 23
40
+ - 55
41
+ - 66
42
+ "4": 3.12.3
43
+ "5": 0.19.6
44
+ "6": 4.49.0.dev0
45
+ "8":
46
+ - 5
47
+ "9":
48
+ "1": transformers_trainer
49
+ "12": 0.19.6
50
+ "13": linux-x86_64
51
+ accelerator_config:
52
+ value:
53
+ dispatch_batches: null
54
+ even_batches: true
55
+ gradient_accumulation_kwargs: null
56
+ non_blocking: false
57
+ split_batches: false
58
+ use_seedable_sampler: true
59
+ activation_dropout:
60
+ value: 0
61
+ activation_function:
62
+ value: gelu
63
+ adafactor:
64
+ value: false
65
+ adam_beta1:
66
+ value: 0.9
67
+ adam_beta2:
68
+ value: 0.999
69
+ adam_epsilon:
70
+ value: 1e-08
71
+ add_cross_attention:
72
+ value: false
73
+ apply_spec_augment:
74
+ value: false
75
+ architectures:
76
+ value:
77
+ - WhisperForConditionalGeneration
78
+ attention_dropout:
79
+ value: 0
80
+ auto_find_batch_size:
81
+ value: false
82
+ average_tokens_across_devices:
83
+ value: false
84
+ bad_words_ids:
85
+ value: null
86
+ batch_eval_metrics:
87
+ value: false
88
+ begin_suppress_tokens:
89
+ value:
90
+ - 220
91
+ - 50257
92
+ bf16:
93
+ value: false
94
+ bf16_full_eval:
95
+ value: false
96
+ bos_token_id:
97
+ value: 50257
98
+ chunk_size_feed_forward:
99
+ value: 0
100
+ classifier_proj_size:
101
+ value: 256
102
+ cross_attention_hidden_size:
103
+ value: null
104
+ d_model:
105
+ value: 768
106
+ data_seed:
107
+ value: null
108
+ dataloader_drop_last:
109
+ value: false
110
+ dataloader_num_workers:
111
+ value: 0
112
+ dataloader_persistent_workers:
113
+ value: false
114
+ dataloader_pin_memory:
115
+ value: true
116
+ dataloader_prefetch_factor:
117
+ value: null
118
+ ddp_backend:
119
+ value: null
120
+ ddp_broadcast_buffers:
121
+ value: null
122
+ ddp_bucket_cap_mb:
123
+ value: null
124
+ ddp_find_unused_parameters:
125
+ value: null
126
+ ddp_timeout:
127
+ value: 1800
128
+ debug:
129
+ value: []
130
+ decoder_attention_heads:
131
+ value: 12
132
+ decoder_ffn_dim:
133
+ value: 3072
134
+ decoder_layerdrop:
135
+ value: 0
136
+ decoder_layers:
137
+ value: 12
138
+ decoder_start_token_id:
139
+ value: 50258
140
+ deepspeed:
141
+ value: null
142
+ disable_tqdm:
143
+ value: false
144
+ dispatch_batches:
145
+ value: null
146
+ diversity_penalty:
147
+ value: 0
148
+ do_eval:
149
+ value: true
150
+ do_predict:
151
+ value: false
152
+ do_sample:
153
+ value: false
154
+ do_train:
155
+ value: true
156
+ dropout:
157
+ value: 0
158
+ early_stopping:
159
+ value: false
160
+ encoder_attention_heads:
161
+ value: 12
162
+ encoder_ffn_dim:
163
+ value: 3072
164
+ encoder_layerdrop:
165
+ value: 0
166
+ encoder_layers:
167
+ value: 12
168
+ encoder_no_repeat_ngram_size:
169
+ value: 0
170
+ eos_token_id:
171
+ value: 50257
172
+ eval_accumulation_steps:
173
+ value: null
174
+ eval_delay:
175
+ value: 0
176
+ eval_do_concat_batches:
177
+ value: true
178
+ eval_on_start:
179
+ value: false
180
+ eval_steps:
181
+ value: 1000
182
+ eval_strategy:
183
+ value: steps
184
+ eval_use_gather_object:
185
+ value: false
186
+ evaluation_strategy:
187
+ value: steps
188
+ exponential_decay_length_penalty:
189
+ value: null
190
+ finetuning_task:
191
+ value: null
192
+ forced_bos_token_id:
193
+ value: null
194
+ forced_decoder_ids:
195
+ value: null
196
+ forced_eos_token_id:
197
+ value: null
198
+ fp16:
199
+ value: true
200
+ fp16_backend:
201
+ value: auto
202
+ fp16_full_eval:
203
+ value: false
204
+ fp16_opt_level:
205
+ value: O1
206
+ fsdp:
207
+ value: []
208
+ fsdp_config:
209
+ value:
210
+ min_num_params: 0
211
+ xla: false
212
+ xla_fsdp_grad_ckpt: false
213
+ xla_fsdp_v2: false
214
+ fsdp_min_num_params:
215
+ value: 0
216
+ fsdp_transformer_layer_cls_to_wrap:
217
+ value: null
218
+ full_determinism:
219
+ value: false
220
+ generation_config:
221
+ value: null
222
+ generation_max_length:
223
+ value: 225
224
+ generation_num_beams:
225
+ value: null
226
+ gradient_accumulation_steps:
227
+ value: 1
228
+ gradient_checkpointing:
229
+ value: true
230
+ gradient_checkpointing_kwargs:
231
+ value: null
232
+ greater_is_better:
233
+ value: false
234
+ group_by_length:
235
+ value: false
236
+ half_precision_backend:
237
+ value: auto
238
+ hub_always_push:
239
+ value: false
240
+ hub_model_id:
241
+ value: null
242
+ hub_private_repo:
243
+ value: null
244
+ hub_strategy:
245
+ value: every_save
246
+ hub_token:
247
+ value: <HUB_TOKEN>
248
+ id2label:
249
+ value:
250
+ "0": LABEL_0
251
+ "1": LABEL_1
252
+ ignore_data_skip:
253
+ value: false
254
+ include_for_metrics:
255
+ value: []
256
+ include_inputs_for_metrics:
257
+ value: false
258
+ include_num_input_tokens_seen:
259
+ value: false
260
+ include_tokens_per_second:
261
+ value: false
262
+ init_std:
263
+ value: 0.02
264
+ is_decoder:
265
+ value: false
266
+ is_encoder_decoder:
267
+ value: true
268
+ jit_mode_eval:
269
+ value: false
270
+ label_names:
271
+ value: null
272
+ label_smoothing_factor:
273
+ value: 0
274
+ label2id:
275
+ value:
276
+ LABEL_0: 0
277
+ LABEL_1: 1
278
+ learning_rate:
279
+ value: 1e-05
280
+ length_column_name:
281
+ value: input_length
282
+ length_penalty:
283
+ value: 1
284
+ load_best_model_at_end:
285
+ value: true
286
+ local_rank:
287
+ value: 0
288
+ log_level:
289
+ value: passive
290
+ log_level_replica:
291
+ value: warning
292
+ log_on_each_node:
293
+ value: true
294
+ logging_dir:
295
+ value: ./runs/Feb12_12-17-27_tknika
296
+ logging_first_step:
297
+ value: false
298
+ logging_nan_inf_filter:
299
+ value: true
300
+ logging_steps:
301
+ value: 25
302
+ logging_strategy:
303
+ value: steps
304
+ lr_scheduler_type:
305
+ value: linear
306
+ mask_feature_length:
307
+ value: 10
308
+ mask_feature_min_masks:
309
+ value: 0
310
+ mask_feature_prob:
311
+ value: 0
312
+ mask_time_length:
313
+ value: 10
314
+ mask_time_min_masks:
315
+ value: 2
316
+ mask_time_prob:
317
+ value: 0.05
318
+ max_grad_norm:
319
+ value: 1
320
+ max_length:
321
+ value: 448
322
+ max_source_positions:
323
+ value: 1500
324
+ max_steps:
325
+ value: 8000
326
+ max_target_positions:
327
+ value: 448
328
+ median_filter_width:
329
+ value: 7
330
+ metric_for_best_model:
331
+ value: wer
332
+ min_length:
333
+ value: 0
334
+ model/num_parameters:
335
+ value: 241734912
336
+ model_type:
337
+ value: whisper
338
+ mp_parameters:
339
+ value: ""
340
+ neftune_noise_alpha:
341
+ value: null
342
+ no_cuda:
343
+ value: false
344
+ no_repeat_ngram_size:
345
+ value: 0
346
+ num_beam_groups:
347
+ value: 1
348
+ num_beams:
349
+ value: 1
350
+ num_hidden_layers:
351
+ value: 12
352
+ num_mel_bins:
353
+ value: 80
354
+ num_return_sequences:
355
+ value: 1
356
+ num_train_epochs:
357
+ value: 3
358
+ optim:
359
+ value: adamw_torch
360
+ optim_args:
361
+ value: null
362
+ optim_target_modules:
363
+ value: null
364
+ output_attentions:
365
+ value: false
366
+ output_dir:
367
+ value: ./
368
+ output_hidden_states:
369
+ value: false
370
+ output_scores:
371
+ value: false
372
+ overwrite_output_dir:
373
+ value: true
374
+ pad_token_id:
375
+ value: 50257
376
+ past_index:
377
+ value: -1
378
+ per_device_eval_batch_size:
379
+ value: 16
380
+ per_device_train_batch_size:
381
+ value: 32
382
+ per_gpu_eval_batch_size:
383
+ value: null
384
+ per_gpu_train_batch_size:
385
+ value: null
386
+ predict_with_generate:
387
+ value: true
388
+ prediction_loss_only:
389
+ value: false
390
+ prefix:
391
+ value: null
392
+ problem_type:
393
+ value: null
394
+ push_to_hub:
395
+ value: true
396
+ push_to_hub_model_id:
397
+ value: null
398
+ push_to_hub_organization:
399
+ value: null
400
+ push_to_hub_token:
401
+ value: <PUSH_TO_HUB_TOKEN>
402
+ ray_scope:
403
+ value: last
404
+ remove_invalid_values:
405
+ value: false
406
+ remove_unused_columns:
407
+ value: true
408
+ repetition_penalty:
409
+ value: 1
410
+ report_to:
411
+ value:
412
+ - wandb
413
+ restore_callback_states_from_checkpoint:
414
+ value: false
415
+ resume_from_checkpoint:
416
+ value: null
417
+ return_dict:
418
+ value: true
419
+ return_dict_in_generate:
420
+ value: false
421
+ run_name:
422
+ value: whisper-small-eu
423
+ save_on_each_node:
424
+ value: false
425
+ save_only_model:
426
+ value: false
427
+ save_safetensors:
428
+ value: true
429
+ save_steps:
430
+ value: 1000
431
+ save_strategy:
432
+ value: steps
433
+ save_total_limit:
434
+ value: null
435
+ scale_embedding:
436
+ value: false
437
+ seed:
438
+ value: 42
439
+ sep_token_id:
440
+ value: null
441
+ skip_memory_metrics:
442
+ value: true
443
+ sortish_sampler:
444
+ value: false
445
+ split_batches:
446
+ value: null
447
+ suppress_tokens:
448
+ value: null
449
+ task_specific_params:
450
+ value: null
451
+ temperature:
452
+ value: 1
453
+ tf_legacy_loss:
454
+ value: false
455
+ tf32:
456
+ value: null
457
+ tie_encoder_decoder:
458
+ value: false
459
+ tie_word_embeddings:
460
+ value: true
461
+ tokenizer_class:
462
+ value: null
463
+ top_k:
464
+ value: 50
465
+ top_p:
466
+ value: 1
467
+ torch_compile:
468
+ value: false
469
+ torch_compile_backend:
470
+ value: null
471
+ torch_compile_mode:
472
+ value: null
473
+ torch_dtype:
474
+ value: float32
475
+ torch_empty_cache_steps:
476
+ value: null
477
+ torchdynamo:
478
+ value: null
479
+ torchscript:
480
+ value: false
481
+ tpu_metrics_debug:
482
+ value: false
483
+ tpu_num_cores:
484
+ value: null
485
+ transformers_version:
486
+ value: 4.49.0.dev0
487
+ typical_p:
488
+ value: 1
489
+ use_bfloat16:
490
+ value: false
491
+ use_cache:
492
+ value: false
493
+ use_cpu:
494
+ value: false
495
+ use_ipex:
496
+ value: false
497
+ use_legacy_prediction_loop:
498
+ value: false
499
+ use_liger_kernel:
500
+ value: false
501
+ use_mps_device:
502
+ value: false
503
+ use_weighted_layer_sum:
504
+ value: false
505
+ vocab_size:
506
+ value: 51865
507
+ warmup_ratio:
508
+ value: 0
509
+ warmup_steps:
510
+ value: 500
511
+ weight_decay:
512
+ value: 0
wandb/run-20250212_121751-d4i88lzt/files/output.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
+ main()
4
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
+ return inner_training_loop(
9
+ ^^^^^^^^^^^^^^^^^^^^
10
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
+ self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
+ return self.call_event("on_epoch_begin", args, state, control)
15
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
+ result = getattr(callback, event)(
18
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
19
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
+ if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
+ ^^^^^^^^^^^^^^^^^^^^^^^^
22
+ AttributeError: 'NoneType' object has no attribute 'dataset'
wandb/run-20250212_121751-d4i88lzt/files/requirements.txt ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiosignal==1.3.2
2
+ Markdown==3.7
3
+ more-itertools==10.6.0
4
+ requests==2.32.3
5
+ sentry-sdk==2.21.0
6
+ torchaudio==2.6.0
7
+ charset-normalizer==3.4.1
8
+ docker-pycreds==0.4.0
9
+ nvidia-cusolver-cu12==11.6.1.9
10
+ PyYAML==6.0.2
11
+ librosa==0.10.2.post1
12
+ soxr==0.5.0.post1
13
+ multiprocess==0.70.16
14
+ setuptools==75.8.0
15
+ nvidia-cufft-cu12==11.2.1.3
16
+ joblib==1.4.2
17
+ pytz==2025.1
18
+ pip==24.0
19
+ scikit-learn==1.6.1
20
+ certifi==2025.1.31
21
+ jiwer==3.1.0
22
+ regex==2024.11.6
23
+ annotated-types==0.7.0
24
+ grpcio==1.70.0
25
+ msgpack==1.1.0
26
+ mpmath==1.3.0
27
+ nvidia-cudnn-cu12==9.1.0.70
28
+ soundfile==0.13.1
29
+ dill==0.3.8
30
+ nvidia-nvtx-cu12==12.4.127
31
+ six==1.17.0
32
+ nvidia-cuda-cupti-cu12==12.4.127
33
+ pyarrow==19.0.0
34
+ nvidia-nccl-cu12==2.21.5
35
+ psutil==6.1.1
36
+ decorator==5.1.1
37
+ llvmlite==0.44.0
38
+ frozenlist==1.5.0
39
+ pydantic==2.10.6
40
+ networkx==3.4.2
41
+ idna==3.10
42
+ wandb==0.19.6
43
+ aiohttp==3.11.12
44
+ RapidFuzz==3.12.1
45
+ pandas==2.2.3
46
+ python-dateutil==2.9.0.post0
47
+ numpy==2.1.3
48
+ tokenizers==0.21.0
49
+ nvidia-cusparselt-cu12==0.6.2
50
+ typing_extensions==4.12.2
51
+ urllib3==2.3.0
52
+ setproctitle==1.3.4
53
+ tzdata==2025.1
54
+ sympy==1.13.1
55
+ pooch==1.8.2
56
+ click==8.1.8
57
+ pydantic_core==2.27.2
58
+ MarkupSafe==3.0.2
59
+ scipy==1.15.1
60
+ accelerate==1.3.0
61
+ tensorboard==2.19.0
62
+ protobuf==5.29.3
63
+ gitdb==4.0.12
64
+ smmap==5.0.2
65
+ absl-py==2.1.0
66
+ tqdm==4.67.1
67
+ yarl==1.18.3
68
+ pycparser==2.22
69
+ nvidia-cusparse-cu12==12.3.1.170
70
+ attrs==25.1.0
71
+ lazy_loader==0.4
72
+ tensorboard-data-server==0.7.2
73
+ threadpoolctl==3.5.0
74
+ GitPython==3.1.44
75
+ safetensors==0.5.2
76
+ fsspec==2024.12.0
77
+ nvidia-cuda-nvrtc-cu12==12.4.127
78
+ filelock==3.17.0
79
+ aiohappyeyeballs==2.4.6
80
+ packaging==24.2
81
+ datasets==3.2.1.dev0
82
+ audioread==3.0.1
83
+ propcache==0.2.1
84
+ transformers==4.49.0.dev0
85
+ nvidia-cuda-runtime-cu12==12.4.127
86
+ cffi==1.17.1
87
+ evaluate==0.4.3
88
+ Werkzeug==3.1.3
89
+ huggingface-hub==0.28.1
90
+ Jinja2==3.1.5
91
+ torch==2.6.0
92
+ nvidia-curand-cu12==10.3.5.147
93
+ xxhash==3.5.0
94
+ platformdirs==4.3.6
95
+ multidict==6.1.0
96
+ nvidia-cublas-cu12==12.4.5.8
97
+ nvidia-nvjitlink-cu12==12.4.127
98
+ triton==3.2.0
99
+ numba==0.61.0
100
+ importlib_metadata==8.0.0
101
+ platformdirs==4.2.2
102
+ typeguard==4.3.0
103
+ more-itertools==10.3.0
104
+ tomli==2.0.1
105
+ autocommand==2.2.2
106
+ zipp==3.19.2
107
+ typing_extensions==4.12.2
108
+ backports.tarfile==1.2.0
109
+ inflect==7.3.1
110
+ jaraco.text==3.12.1
111
+ wheel==0.43.0
112
+ packaging==24.2
113
+ jaraco.collections==5.1.0
114
+ jaraco.functools==4.0.1
115
+ jaraco.context==5.3.0
wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.3",
4
+ "startedAt": "2025-02-12T12:17:51.527114Z",
5
+ "args": [
6
+ "--model_name_or_path=openai/whisper-small",
7
+ "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
+ "--language=basque",
9
+ "--train_split_name=train",
10
+ "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
11
+ "--model_index_name=Whisper Small Basque",
12
+ "--max_steps=8000",
13
+ "--output_dir=./",
14
+ "--per_device_train_batch_size=32",
15
+ "--per_device_eval_batch_size=16",
16
+ "--gradient_accumulation_steps=1",
17
+ "--logging_steps=25",
18
+ "--learning_rate=1e-5",
19
+ "--warmup_steps=500",
20
+ "--evaluation_strategy=steps",
21
+ "--eval_steps=1000",
22
+ "--save_strategy=steps",
23
+ "--save_steps=1000",
24
+ "--generation_max_length=225",
25
+ "--length_column_name=input_length",
26
+ "--max_duration_in_seconds=30",
27
+ "--text_column_name=sentence",
28
+ "--freeze_feature_encoder=False",
29
+ "--report_to=tensorboard",
30
+ "--metric_for_best_model=wer",
31
+ "--greater_is_better=False",
32
+ "--load_best_model_at_end",
33
+ "--gradient_checkpointing",
34
+ "--fp16",
35
+ "--overwrite_output_dir",
36
+ "--do_train",
37
+ "--do_eval",
38
+ "--predict_with_generate",
39
+ "--do_normalize_eval",
40
+ "--streaming",
41
+ "--use_auth_token",
42
+ "--push_to_hub",
43
+ "--report_to",
44
+ "wandb",
45
+ "--run_name",
46
+ "whisper-small-eu"
47
+ ],
48
+ "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
+ "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
+ "git": {
51
+ "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
+ "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
+ },
54
+ "email": "[email protected]",
55
+ "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
+ "host": "tknika",
57
+ "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
+ "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
+ "cpu_count": 8,
60
+ "cpu_count_logical": 8,
61
+ "gpu": "NVIDIA L40-48Q",
62
+ "gpu_count": 1,
63
+ "disk": {
64
+ "/": {
65
+ "total": "525987168256",
66
+ "used": "297346564096"
67
+ }
68
+ },
69
+ "memory": {
70
+ "total": "33654022144"
71
+ },
72
+ "cpu": {
73
+ "count": 8,
74
+ "countLogical": 8
75
+ },
76
+ "gpu_nvidia": [
77
+ {
78
+ "name": "NVIDIA L40-48Q",
79
+ "memoryTotal": "51539607552",
80
+ "cudaCores": 18176,
81
+ "architecture": "Ada"
82
+ }
83
+ ],
84
+ "cudaVersion": "12.4"
85
+ }
wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0}}
wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392}
3
+ {"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}}
4
+ {"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"}
5
+ {"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
6
+ {"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
7
+ {"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"}
8
+ {"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"}
9
+ {"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"}
10
+ {"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"}
12
+ {"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"}
13
+ {"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"}
14
+ {"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"}
wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"}
2
+ {"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"}
3
+ {"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"}
4
+ {"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"}
5
+ {"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"}
6
+ {"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"}
7
+ {"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"}
9
+ {"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"}
13
+ {"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"}
14
+ {"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"}
15
+ {"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"}
wandb/run-20250212_121751-d4i88lzt/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392
3
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
7
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
8
+ 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:init():756] calling init triggers
9
+ 2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():789] starting backend
12
+ 2025-02-12 12:17:51,521 INFO MainThread:223392 [wandb_init.py:init():793] sending inform_init request
13
+ 2025-02-12 12:17:51,526 INFO MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-02-12 12:17:51,526 INFO MainThread:223392 [wandb_init.py:init():808] backend started and connected
15
+ 2025-02-12 12:17:51,528 INFO MainThread:223392 [wandb_init.py:init():901] updated telemetry
16
+ 2025-02-12 12:17:51,535 INFO MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
+ 2025-02-12 12:17:51,944 INFO MainThread:223392 [wandb_init.py:init():994] starting run threads in backend
18
+ 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg
19
+ 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
+ 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
+ 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed.
22
+ 2025-02-12 12:17:52,051 INFO MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process
23
+ 2025-02-12 12:17:52,052 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
+ 2025-02-12 12:17:52,054 INFO MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75ef87e92c00>>
25
+ 2025-02-12 12:17:52,055 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
+ 2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed
wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb ADDED
Binary file (11.3 kB). View file
 
wandb/run-20250212_122637-v3d3ouvn/files/config.yaml ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _attn_implementation_autoset:
2
+ value: true
3
+ _name_or_path:
4
+ value: openai/whisper-small
5
+ _wandb:
6
+ value:
7
+ cli_version: 0.19.6
8
+ m:
9
+ - "1": train/global_step
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ python_version: 3.12.3
14
+ t:
15
+ "1":
16
+ - 1
17
+ - 5
18
+ - 11
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 100
25
+ "2":
26
+ - 1
27
+ - 5
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 53
32
+ - 55
33
+ - 71
34
+ - 100
35
+ "3":
36
+ - 7
37
+ - 13
38
+ - 19
39
+ - 23
40
+ - 55
41
+ - 66
42
+ "4": 3.12.3
43
+ "5": 0.19.6
44
+ "6": 4.49.0.dev0
45
+ "8":
46
+ - 5
47
+ "9":
48
+ "1": transformers_trainer
49
+ "12": 0.19.6
50
+ "13": linux-x86_64
51
+ accelerator_config:
52
+ value:
53
+ dispatch_batches: null
54
+ even_batches: true
55
+ gradient_accumulation_kwargs: null
56
+ non_blocking: false
57
+ split_batches: false
58
+ use_seedable_sampler: true
59
+ activation_dropout:
60
+ value: 0
61
+ activation_function:
62
+ value: gelu
63
+ adafactor:
64
+ value: false
65
+ adam_beta1:
66
+ value: 0.9
67
+ adam_beta2:
68
+ value: 0.999
69
+ adam_epsilon:
70
+ value: 1e-08
71
+ add_cross_attention:
72
+ value: false
73
+ apply_spec_augment:
74
+ value: false
75
+ architectures:
76
+ value:
77
+ - WhisperForConditionalGeneration
78
+ attention_dropout:
79
+ value: 0
80
+ auto_find_batch_size:
81
+ value: false
82
+ average_tokens_across_devices:
83
+ value: false
84
+ bad_words_ids:
85
+ value: null
86
+ batch_eval_metrics:
87
+ value: false
88
+ begin_suppress_tokens:
89
+ value:
90
+ - 220
91
+ - 50257
92
+ bf16:
93
+ value: false
94
+ bf16_full_eval:
95
+ value: false
96
+ bos_token_id:
97
+ value: 50257
98
+ chunk_size_feed_forward:
99
+ value: 0
100
+ classifier_proj_size:
101
+ value: 256
102
+ cross_attention_hidden_size:
103
+ value: null
104
+ d_model:
105
+ value: 768
106
+ data_seed:
107
+ value: null
108
+ dataloader_drop_last:
109
+ value: false
110
+ dataloader_num_workers:
111
+ value: 0
112
+ dataloader_persistent_workers:
113
+ value: false
114
+ dataloader_pin_memory:
115
+ value: true
116
+ dataloader_prefetch_factor:
117
+ value: null
118
+ ddp_backend:
119
+ value: null
120
+ ddp_broadcast_buffers:
121
+ value: null
122
+ ddp_bucket_cap_mb:
123
+ value: null
124
+ ddp_find_unused_parameters:
125
+ value: null
126
+ ddp_timeout:
127
+ value: 1800
128
+ debug:
129
+ value: []
130
+ decoder_attention_heads:
131
+ value: 12
132
+ decoder_ffn_dim:
133
+ value: 3072
134
+ decoder_layerdrop:
135
+ value: 0
136
+ decoder_layers:
137
+ value: 12
138
+ decoder_start_token_id:
139
+ value: 50258
140
+ deepspeed:
141
+ value: null
142
+ disable_tqdm:
143
+ value: false
144
+ dispatch_batches:
145
+ value: null
146
+ diversity_penalty:
147
+ value: 0
148
+ do_eval:
149
+ value: true
150
+ do_predict:
151
+ value: false
152
+ do_sample:
153
+ value: false
154
+ do_train:
155
+ value: true
156
+ dropout:
157
+ value: 0
158
+ early_stopping:
159
+ value: false
160
+ encoder_attention_heads:
161
+ value: 12
162
+ encoder_ffn_dim:
163
+ value: 3072
164
+ encoder_layerdrop:
165
+ value: 0
166
+ encoder_layers:
167
+ value: 12
168
+ encoder_no_repeat_ngram_size:
169
+ value: 0
170
+ eos_token_id:
171
+ value: 50257
172
+ eval_accumulation_steps:
173
+ value: null
174
+ eval_delay:
175
+ value: 0
176
+ eval_do_concat_batches:
177
+ value: true
178
+ eval_on_start:
179
+ value: false
180
+ eval_steps:
181
+ value: 1000
182
+ eval_strategy:
183
+ value: steps
184
+ eval_use_gather_object:
185
+ value: false
186
+ evaluation_strategy:
187
+ value: steps
188
+ exponential_decay_length_penalty:
189
+ value: null
190
+ finetuning_task:
191
+ value: null
192
+ forced_bos_token_id:
193
+ value: null
194
+ forced_decoder_ids:
195
+ value: null
196
+ forced_eos_token_id:
197
+ value: null
198
+ fp16:
199
+ value: true
200
+ fp16_backend:
201
+ value: auto
202
+ fp16_full_eval:
203
+ value: false
204
+ fp16_opt_level:
205
+ value: O1
206
+ fsdp:
207
+ value: []
208
+ fsdp_config:
209
+ value:
210
+ min_num_params: 0
211
+ xla: false
212
+ xla_fsdp_grad_ckpt: false
213
+ xla_fsdp_v2: false
214
+ fsdp_min_num_params:
215
+ value: 0
216
+ fsdp_transformer_layer_cls_to_wrap:
217
+ value: null
218
+ full_determinism:
219
+ value: false
220
+ generation_config:
221
+ value: null
222
+ generation_max_length:
223
+ value: 225
224
+ generation_num_beams:
225
+ value: null
226
+ gradient_accumulation_steps:
227
+ value: 1
228
+ gradient_checkpointing:
229
+ value: true
230
+ gradient_checkpointing_kwargs:
231
+ value: null
232
+ greater_is_better:
233
+ value: false
234
+ group_by_length:
235
+ value: false
236
+ half_precision_backend:
237
+ value: auto
238
+ hub_always_push:
239
+ value: false
240
+ hub_model_id:
241
+ value: null
242
+ hub_private_repo:
243
+ value: null
244
+ hub_strategy:
245
+ value: every_save
246
+ hub_token:
247
+ value: <HUB_TOKEN>
248
+ id2label:
249
+ value:
250
+ "0": LABEL_0
251
+ "1": LABEL_1
252
+ ignore_data_skip:
253
+ value: false
254
+ include_for_metrics:
255
+ value: []
256
+ include_inputs_for_metrics:
257
+ value: false
258
+ include_num_input_tokens_seen:
259
+ value: false
260
+ include_tokens_per_second:
261
+ value: false
262
+ init_std:
263
+ value: 0.02
264
+ is_decoder:
265
+ value: false
266
+ is_encoder_decoder:
267
+ value: true
268
+ jit_mode_eval:
269
+ value: false
270
+ label_names:
271
+ value: null
272
+ label_smoothing_factor:
273
+ value: 0
274
+ label2id:
275
+ value:
276
+ LABEL_0: 0
277
+ LABEL_1: 1
278
+ learning_rate:
279
+ value: 1e-05
280
+ length_column_name:
281
+ value: input_length
282
+ length_penalty:
283
+ value: 1
284
+ load_best_model_at_end:
285
+ value: true
286
+ local_rank:
287
+ value: 0
288
+ log_level:
289
+ value: passive
290
+ log_level_replica:
291
+ value: warning
292
+ log_on_each_node:
293
+ value: true
294
+ logging_dir:
295
+ value: ./runs/Feb12_12-26-11_tknika
296
+ logging_first_step:
297
+ value: false
298
+ logging_nan_inf_filter:
299
+ value: true
300
+ logging_steps:
301
+ value: 25
302
+ logging_strategy:
303
+ value: steps
304
+ lr_scheduler_type:
305
+ value: linear
306
+ mask_feature_length:
307
+ value: 10
308
+ mask_feature_min_masks:
309
+ value: 0
310
+ mask_feature_prob:
311
+ value: 0
312
+ mask_time_length:
313
+ value: 10
314
+ mask_time_min_masks:
315
+ value: 2
316
+ mask_time_prob:
317
+ value: 0.05
318
+ max_grad_norm:
319
+ value: 1
320
+ max_length:
321
+ value: 448
322
+ max_source_positions:
323
+ value: 1500
324
+ max_steps:
325
+ value: 8000
326
+ max_target_positions:
327
+ value: 448
328
+ median_filter_width:
329
+ value: 7
330
+ metric_for_best_model:
331
+ value: wer
332
+ min_length:
333
+ value: 0
334
+ model/num_parameters:
335
+ value: 241734912
336
+ model_type:
337
+ value: whisper
338
+ mp_parameters:
339
+ value: ""
340
+ neftune_noise_alpha:
341
+ value: null
342
+ no_cuda:
343
+ value: false
344
+ no_repeat_ngram_size:
345
+ value: 0
346
+ num_beam_groups:
347
+ value: 1
348
+ num_beams:
349
+ value: 1
350
+ num_hidden_layers:
351
+ value: 12
352
+ num_mel_bins:
353
+ value: 80
354
+ num_return_sequences:
355
+ value: 1
356
+ num_train_epochs:
357
+ value: 3
358
+ optim:
359
+ value: adamw_torch
360
+ optim_args:
361
+ value: null
362
+ optim_target_modules:
363
+ value: null
364
+ output_attentions:
365
+ value: false
366
+ output_dir:
367
+ value: ./
368
+ output_hidden_states:
369
+ value: false
370
+ output_scores:
371
+ value: false
372
+ overwrite_output_dir:
373
+ value: true
374
+ pad_token_id:
375
+ value: 50257
376
+ past_index:
377
+ value: -1
378
+ per_device_eval_batch_size:
379
+ value: 16
380
+ per_device_train_batch_size:
381
+ value: 32
382
+ per_gpu_eval_batch_size:
383
+ value: null
384
+ per_gpu_train_batch_size:
385
+ value: null
386
+ predict_with_generate:
387
+ value: true
388
+ prediction_loss_only:
389
+ value: false
390
+ prefix:
391
+ value: null
392
+ problem_type:
393
+ value: null
394
+ push_to_hub:
395
+ value: true
396
+ push_to_hub_model_id:
397
+ value: null
398
+ push_to_hub_organization:
399
+ value: null
400
+ push_to_hub_token:
401
+ value: <PUSH_TO_HUB_TOKEN>
402
+ ray_scope:
403
+ value: last
404
+ remove_invalid_values:
405
+ value: false
406
+ remove_unused_columns:
407
+ value: true
408
+ repetition_penalty:
409
+ value: 1
410
+ report_to:
411
+ value:
412
+ - wandb
413
+ restore_callback_states_from_checkpoint:
414
+ value: false
415
+ resume_from_checkpoint:
416
+ value: null
417
+ return_dict:
418
+ value: true
419
+ return_dict_in_generate:
420
+ value: false
421
+ run_name:
422
+ value: whisper-small-eu
423
+ save_on_each_node:
424
+ value: false
425
+ save_only_model:
426
+ value: false
427
+ save_safetensors:
428
+ value: true
429
+ save_steps:
430
+ value: 1000
431
+ save_strategy:
432
+ value: steps
433
+ save_total_limit:
434
+ value: null
435
+ scale_embedding:
436
+ value: false
437
+ seed:
438
+ value: 42
439
+ sep_token_id:
440
+ value: null
441
+ skip_memory_metrics:
442
+ value: true
443
+ sortish_sampler:
444
+ value: false
445
+ split_batches:
446
+ value: null
447
+ suppress_tokens:
448
+ value: null
449
+ task_specific_params:
450
+ value: null
451
+ temperature:
452
+ value: 1
453
+ tf_legacy_loss:
454
+ value: false
455
+ tf32:
456
+ value: null
457
+ tie_encoder_decoder:
458
+ value: false
459
+ tie_word_embeddings:
460
+ value: true
461
+ tokenizer_class:
462
+ value: null
463
+ top_k:
464
+ value: 50
465
+ top_p:
466
+ value: 1
467
+ torch_compile:
468
+ value: false
469
+ torch_compile_backend:
470
+ value: null
471
+ torch_compile_mode:
472
+ value: null
473
+ torch_dtype:
474
+ value: float32
475
+ torch_empty_cache_steps:
476
+ value: null
477
+ torchdynamo:
478
+ value: null
479
+ torchscript:
480
+ value: false
481
+ tpu_metrics_debug:
482
+ value: false
483
+ tpu_num_cores:
484
+ value: null
485
+ transformers_version:
486
+ value: 4.49.0.dev0
487
+ typical_p:
488
+ value: 1
489
+ use_bfloat16:
490
+ value: false
491
+ use_cache:
492
+ value: false
493
+ use_cpu:
494
+ value: false
495
+ use_ipex:
496
+ value: false
497
+ use_legacy_prediction_loop:
498
+ value: false
499
+ use_liger_kernel:
500
+ value: false
501
+ use_mps_device:
502
+ value: false
503
+ use_weighted_layer_sum:
504
+ value: false
505
+ vocab_size:
506
+ value: 51865
507
+ warmup_ratio:
508
+ value: 0
509
+ warmup_steps:
510
+ value: 500
511
+ weight_decay:
512
+ value: 0
wandb/run-20250212_122637-v3d3ouvn/files/output.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
+ main()
4
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
+ return inner_training_loop(
9
+ ^^^^^^^^^^^^^^^^^^^^
10
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
+ self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
+ return self.call_event("on_epoch_begin", args, state, control)
15
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
+ result = getattr(callback, event)(
18
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
19
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
+ if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
+ ^^^^^^^^^^^^^^^^^^^^^^^^
22
+ AttributeError: 'NoneType' object has no attribute 'dataset'
wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiosignal==1.3.2
2
+ Markdown==3.7
3
+ more-itertools==10.6.0
4
+ requests==2.32.3
5
+ sentry-sdk==2.21.0
6
+ torchaudio==2.6.0
7
+ charset-normalizer==3.4.1
8
+ docker-pycreds==0.4.0
9
+ nvidia-cusolver-cu12==11.6.1.9
10
+ PyYAML==6.0.2
11
+ librosa==0.10.2.post1
12
+ soxr==0.5.0.post1
13
+ multiprocess==0.70.16
14
+ setuptools==75.8.0
15
+ nvidia-cufft-cu12==11.2.1.3
16
+ joblib==1.4.2
17
+ pytz==2025.1
18
+ pip==24.0
19
+ scikit-learn==1.6.1
20
+ certifi==2025.1.31
21
+ jiwer==3.1.0
22
+ regex==2024.11.6
23
+ annotated-types==0.7.0
24
+ grpcio==1.70.0
25
+ msgpack==1.1.0
26
+ mpmath==1.3.0
27
+ nvidia-cudnn-cu12==9.1.0.70
28
+ soundfile==0.13.1
29
+ dill==0.3.8
30
+ nvidia-nvtx-cu12==12.4.127
31
+ six==1.17.0
32
+ nvidia-cuda-cupti-cu12==12.4.127
33
+ pyarrow==19.0.0
34
+ nvidia-nccl-cu12==2.21.5
35
+ psutil==6.1.1
36
+ decorator==5.1.1
37
+ llvmlite==0.44.0
38
+ frozenlist==1.5.0
39
+ pydantic==2.10.6
40
+ networkx==3.4.2
41
+ idna==3.10
42
+ wandb==0.19.6
43
+ aiohttp==3.11.12
44
+ RapidFuzz==3.12.1
45
+ pandas==2.2.3
46
+ python-dateutil==2.9.0.post0
47
+ numpy==2.1.3
48
+ tokenizers==0.21.0
49
+ nvidia-cusparselt-cu12==0.6.2
50
+ typing_extensions==4.12.2
51
+ urllib3==2.3.0
52
+ setproctitle==1.3.4
53
+ tzdata==2025.1
54
+ sympy==1.13.1
55
+ pooch==1.8.2
56
+ click==8.1.8
57
+ pydantic_core==2.27.2
58
+ MarkupSafe==3.0.2
59
+ scipy==1.15.1
60
+ accelerate==1.3.0
61
+ tensorboard==2.19.0
62
+ protobuf==5.29.3
63
+ gitdb==4.0.12
64
+ smmap==5.0.2
65
+ absl-py==2.1.0
66
+ tqdm==4.67.1
67
+ yarl==1.18.3
68
+ pycparser==2.22
69
+ nvidia-cusparse-cu12==12.3.1.170
70
+ attrs==25.1.0
71
+ lazy_loader==0.4
72
+ tensorboard-data-server==0.7.2
73
+ threadpoolctl==3.5.0
74
+ GitPython==3.1.44
75
+ safetensors==0.5.2
76
+ fsspec==2024.12.0
77
+ nvidia-cuda-nvrtc-cu12==12.4.127
78
+ filelock==3.17.0
79
+ aiohappyeyeballs==2.4.6
80
+ packaging==24.2
81
+ datasets==3.2.1.dev0
82
+ audioread==3.0.1
83
+ propcache==0.2.1
84
+ transformers==4.49.0.dev0
85
+ nvidia-cuda-runtime-cu12==12.4.127
86
+ cffi==1.17.1
87
+ evaluate==0.4.3
88
+ Werkzeug==3.1.3
89
+ huggingface-hub==0.28.1
90
+ Jinja2==3.1.5
91
+ torch==2.6.0
92
+ nvidia-curand-cu12==10.3.5.147
93
+ xxhash==3.5.0
94
+ platformdirs==4.3.6
95
+ multidict==6.1.0
96
+ nvidia-cublas-cu12==12.4.5.8
97
+ nvidia-nvjitlink-cu12==12.4.127
98
+ triton==3.2.0
99
+ numba==0.61.0
100
+ importlib_metadata==8.0.0
101
+ platformdirs==4.2.2
102
+ typeguard==4.3.0
103
+ more-itertools==10.3.0
104
+ tomli==2.0.1
105
+ autocommand==2.2.2
106
+ zipp==3.19.2
107
+ typing_extensions==4.12.2
108
+ backports.tarfile==1.2.0
109
+ inflect==7.3.1
110
+ jaraco.text==3.12.1
111
+ wheel==0.43.0
112
+ packaging==24.2
113
+ jaraco.collections==5.1.0
114
+ jaraco.functools==4.0.1
115
+ jaraco.context==5.3.0
wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.3",
4
+ "startedAt": "2025-02-12T12:26:37.277902Z",
5
+ "args": [
6
+ "--model_name_or_path=openai/whisper-small",
7
+ "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
+ "--language=basque",
9
+ "--train_split_name=train",
10
+ "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
11
+ "--model_index_name=Whisper Small Basque",
12
+ "--max_steps=8000",
13
+ "--output_dir=./",
14
+ "--per_device_train_batch_size=32",
15
+ "--per_device_eval_batch_size=16",
16
+ "--gradient_accumulation_steps=1",
17
+ "--logging_steps=25",
18
+ "--learning_rate=1e-5",
19
+ "--warmup_steps=500",
20
+ "--evaluation_strategy=steps",
21
+ "--eval_steps=1000",
22
+ "--save_strategy=steps",
23
+ "--save_steps=1000",
24
+ "--generation_max_length=225",
25
+ "--length_column_name=input_length",
26
+ "--max_duration_in_seconds=30",
27
+ "--text_column_name=sentence",
28
+ "--freeze_feature_encoder=False",
29
+ "--report_to=tensorboard",
30
+ "--metric_for_best_model=wer",
31
+ "--greater_is_better=False",
32
+ "--load_best_model_at_end",
33
+ "--gradient_checkpointing",
34
+ "--fp16",
35
+ "--overwrite_output_dir",
36
+ "--do_train",
37
+ "--do_eval",
38
+ "--predict_with_generate",
39
+ "--do_normalize_eval",
40
+ "--streaming",
41
+ "--use_auth_token",
42
+ "--push_to_hub",
43
+ "--report_to",
44
+ "wandb",
45
+ "--run_name",
46
+ "whisper-small-eu"
47
+ ],
48
+ "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
+ "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
+ "git": {
51
+ "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
+ "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
+ },
54
+ "email": "[email protected]",
55
+ "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
+ "host": "tknika",
57
+ "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
+ "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
+ "cpu_count": 8,
60
+ "cpu_count_logical": 8,
61
+ "gpu": "NVIDIA L40-48Q",
62
+ "gpu_count": 1,
63
+ "disk": {
64
+ "/": {
65
+ "total": "525987168256",
66
+ "used": "297346666496"
67
+ }
68
+ },
69
+ "memory": {
70
+ "total": "33654022144"
71
+ },
72
+ "cpu": {
73
+ "count": 8,
74
+ "countLogical": 8
75
+ },
76
+ "gpu_nvidia": [
77
+ {
78
+ "name": "NVIDIA L40-48Q",
79
+ "memoryTotal": "51539607552",
80
+ "cudaCores": 18176,
81
+ "architecture": "Ada"
82
+ }
83
+ ],
84
+ "cudaVersion": "12.4"
85
+ }
wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0}}
wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110}
3
+ {"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}}
4
+ {"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"}
5
+ {"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
6
+ {"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
7
+ {"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"}
8
+ {"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"}
10
+ {"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"}
11
+ {"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"}
12
+ {"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"}
13
+ {"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"}
14
+ {"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"}
wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"}
2
+ {"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"}
3
+ {"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"}
4
+ {"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"}
5
+ {"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"}
6
+ {"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"}
7
+ {"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"}
9
+ {"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"}
13
+ {"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"}
14
+ {"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"}
15
+ {"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"}
wandb/run-20250212_122637-v3d3ouvn/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110
3
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
7
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
8
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():756] calling init triggers
9
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():789] starting backend
12
+ 2025-02-12 12:26:37,272 INFO MainThread:224110 [wandb_init.py:init():793] sending inform_init request
13
+ 2025-02-12 12:26:37,277 INFO MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-02-12 12:26:37,277 INFO MainThread:224110 [wandb_init.py:init():808] backend started and connected
15
+ 2025-02-12 12:26:37,279 INFO MainThread:224110 [wandb_init.py:init():901] updated telemetry
16
+ 2025-02-12 12:26:37,285 INFO MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
+ 2025-02-12 12:26:37,653 INFO MainThread:224110 [wandb_init.py:init():994] starting run threads in backend
18
+ 2025-02-12 12:26:37,764 INFO MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg
19
+ 2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
+ 2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
+ 2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed.
22
+ 2025-02-12 12:26:37,766 INFO MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process
23
+ 2025-02-12 12:26:37,767 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
+ 2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7a5cbc15a330>>
25
+ 2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
+ 2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed
wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb ADDED
Binary file (11.3 kB). View file
 
wandb/run-20250212_122854-4m048f5s/files/config.yaml ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _attn_implementation_autoset:
2
+ value: true
3
+ _name_or_path:
4
+ value: openai/whisper-small
5
+ _wandb:
6
+ value:
7
+ cli_version: 0.19.6
8
+ m:
9
+ - "1": train/global_step
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ python_version: 3.12.3
14
+ t:
15
+ "1":
16
+ - 1
17
+ - 5
18
+ - 11
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 100
25
+ "2":
26
+ - 1
27
+ - 5
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 53
32
+ - 55
33
+ - 71
34
+ - 100
35
+ "3":
36
+ - 7
37
+ - 13
38
+ - 19
39
+ - 23
40
+ - 55
41
+ - 66
42
+ "4": 3.12.3
43
+ "5": 0.19.6
44
+ "6": 4.49.0.dev0
45
+ "8":
46
+ - 5
47
+ "9":
48
+ "1": transformers_trainer
49
+ "12": 0.19.6
50
+ "13": linux-x86_64
51
+ accelerator_config:
52
+ value:
53
+ dispatch_batches: null
54
+ even_batches: true
55
+ gradient_accumulation_kwargs: null
56
+ non_blocking: false
57
+ split_batches: false
58
+ use_seedable_sampler: true
59
+ activation_dropout:
60
+ value: 0
61
+ activation_function:
62
+ value: gelu
63
+ adafactor:
64
+ value: false
65
+ adam_beta1:
66
+ value: 0.9
67
+ adam_beta2:
68
+ value: 0.999
69
+ adam_epsilon:
70
+ value: 1e-08
71
+ add_cross_attention:
72
+ value: false
73
+ apply_spec_augment:
74
+ value: false
75
+ architectures:
76
+ value:
77
+ - WhisperForConditionalGeneration
78
+ attention_dropout:
79
+ value: 0
80
+ auto_find_batch_size:
81
+ value: false
82
+ average_tokens_across_devices:
83
+ value: false
84
+ bad_words_ids:
85
+ value: null
86
+ batch_eval_metrics:
87
+ value: false
88
+ begin_suppress_tokens:
89
+ value:
90
+ - 220
91
+ - 50257
92
+ bf16:
93
+ value: false
94
+ bf16_full_eval:
95
+ value: false
96
+ bos_token_id:
97
+ value: 50257
98
+ chunk_size_feed_forward:
99
+ value: 0
100
+ classifier_proj_size:
101
+ value: 256
102
+ cross_attention_hidden_size:
103
+ value: null
104
+ d_model:
105
+ value: 768
106
+ data_seed:
107
+ value: null
108
+ dataloader_drop_last:
109
+ value: false
110
+ dataloader_num_workers:
111
+ value: 0
112
+ dataloader_persistent_workers:
113
+ value: false
114
+ dataloader_pin_memory:
115
+ value: true
116
+ dataloader_prefetch_factor:
117
+ value: null
118
+ ddp_backend:
119
+ value: null
120
+ ddp_broadcast_buffers:
121
+ value: null
122
+ ddp_bucket_cap_mb:
123
+ value: null
124
+ ddp_find_unused_parameters:
125
+ value: null
126
+ ddp_timeout:
127
+ value: 1800
128
+ debug:
129
+ value: []
130
+ decoder_attention_heads:
131
+ value: 12
132
+ decoder_ffn_dim:
133
+ value: 3072
134
+ decoder_layerdrop:
135
+ value: 0
136
+ decoder_layers:
137
+ value: 12
138
+ decoder_start_token_id:
139
+ value: 50258
140
+ deepspeed:
141
+ value: null
142
+ disable_tqdm:
143
+ value: false
144
+ dispatch_batches:
145
+ value: null
146
+ diversity_penalty:
147
+ value: 0
148
+ do_eval:
149
+ value: true
150
+ do_predict:
151
+ value: false
152
+ do_sample:
153
+ value: false
154
+ do_train:
155
+ value: true
156
+ dropout:
157
+ value: 0
158
+ early_stopping:
159
+ value: false
160
+ encoder_attention_heads:
161
+ value: 12
162
+ encoder_ffn_dim:
163
+ value: 3072
164
+ encoder_layerdrop:
165
+ value: 0
166
+ encoder_layers:
167
+ value: 12
168
+ encoder_no_repeat_ngram_size:
169
+ value: 0
170
+ eos_token_id:
171
+ value: 50257
172
+ eval_accumulation_steps:
173
+ value: null
174
+ eval_delay:
175
+ value: 0
176
+ eval_do_concat_batches:
177
+ value: true
178
+ eval_on_start:
179
+ value: false
180
+ eval_steps:
181
+ value: 1000
182
+ eval_strategy:
183
+ value: steps
184
+ eval_use_gather_object:
185
+ value: false
186
+ evaluation_strategy:
187
+ value: steps
188
+ exponential_decay_length_penalty:
189
+ value: null
190
+ finetuning_task:
191
+ value: null
192
+ forced_bos_token_id:
193
+ value: null
194
+ forced_decoder_ids:
195
+ value: null
196
+ forced_eos_token_id:
197
+ value: null
198
+ fp16:
199
+ value: true
200
+ fp16_backend:
201
+ value: auto
202
+ fp16_full_eval:
203
+ value: false
204
+ fp16_opt_level:
205
+ value: O1
206
+ fsdp:
207
+ value: []
208
+ fsdp_config:
209
+ value:
210
+ min_num_params: 0
211
+ xla: false
212
+ xla_fsdp_grad_ckpt: false
213
+ xla_fsdp_v2: false
214
+ fsdp_min_num_params:
215
+ value: 0
216
+ fsdp_transformer_layer_cls_to_wrap:
217
+ value: null
218
+ full_determinism:
219
+ value: false
220
+ generation_config:
221
+ value: null
222
+ generation_max_length:
223
+ value: 225
224
+ generation_num_beams:
225
+ value: null
226
+ gradient_accumulation_steps:
227
+ value: 1
228
+ gradient_checkpointing:
229
+ value: true
230
+ gradient_checkpointing_kwargs:
231
+ value: null
232
+ greater_is_better:
233
+ value: false
234
+ group_by_length:
235
+ value: false
236
+ half_precision_backend:
237
+ value: auto
238
+ hub_always_push:
239
+ value: false
240
+ hub_model_id:
241
+ value: null
242
+ hub_private_repo:
243
+ value: null
244
+ hub_strategy:
245
+ value: every_save
246
+ hub_token:
247
+ value: <HUB_TOKEN>
248
+ id2label:
249
+ value:
250
+ "0": LABEL_0
251
+ "1": LABEL_1
252
+ ignore_data_skip:
253
+ value: false
254
+ include_for_metrics:
255
+ value: []
256
+ include_inputs_for_metrics:
257
+ value: false
258
+ include_num_input_tokens_seen:
259
+ value: false
260
+ include_tokens_per_second:
261
+ value: false
262
+ init_std:
263
+ value: 0.02
264
+ is_decoder:
265
+ value: false
266
+ is_encoder_decoder:
267
+ value: true
268
+ jit_mode_eval:
269
+ value: false
270
+ label_names:
271
+ value: null
272
+ label_smoothing_factor:
273
+ value: 0
274
+ label2id:
275
+ value:
276
+ LABEL_0: 0
277
+ LABEL_1: 1
278
+ learning_rate:
279
+ value: 1e-05
280
+ length_column_name:
281
+ value: input_length
282
+ length_penalty:
283
+ value: 1
284
+ load_best_model_at_end:
285
+ value: true
286
+ local_rank:
287
+ value: 0
288
+ log_level:
289
+ value: passive
290
+ log_level_replica:
291
+ value: warning
292
+ log_on_each_node:
293
+ value: true
294
+ logging_dir:
295
+ value: ./runs/Feb12_12-28-29_tknika
296
+ logging_first_step:
297
+ value: false
298
+ logging_nan_inf_filter:
299
+ value: true
300
+ logging_steps:
301
+ value: 25
302
+ logging_strategy:
303
+ value: steps
304
+ lr_scheduler_type:
305
+ value: linear
306
+ mask_feature_length:
307
+ value: 10
308
+ mask_feature_min_masks:
309
+ value: 0
310
+ mask_feature_prob:
311
+ value: 0
312
+ mask_time_length:
313
+ value: 10
314
+ mask_time_min_masks:
315
+ value: 2
316
+ mask_time_prob:
317
+ value: 0.05
318
+ max_grad_norm:
319
+ value: 1
320
+ max_length:
321
+ value: 448
322
+ max_source_positions:
323
+ value: 1500
324
+ max_steps:
325
+ value: 8000
326
+ max_target_positions:
327
+ value: 448
328
+ median_filter_width:
329
+ value: 7
330
+ metric_for_best_model:
331
+ value: wer
332
+ min_length:
333
+ value: 0
334
+ model/num_parameters:
335
+ value: 241734912
336
+ model_type:
337
+ value: whisper
338
+ mp_parameters:
339
+ value: ""
340
+ neftune_noise_alpha:
341
+ value: null
342
+ no_cuda:
343
+ value: false
344
+ no_repeat_ngram_size:
345
+ value: 0
346
+ num_beam_groups:
347
+ value: 1
348
+ num_beams:
349
+ value: 1
350
+ num_hidden_layers:
351
+ value: 12
352
+ num_mel_bins:
353
+ value: 80
354
+ num_return_sequences:
355
+ value: 1
356
+ num_train_epochs:
357
+ value: 3
358
+ optim:
359
+ value: adamw_torch
360
+ optim_args:
361
+ value: null
362
+ optim_target_modules:
363
+ value: null
364
+ output_attentions:
365
+ value: false
366
+ output_dir:
367
+ value: ./
368
+ output_hidden_states:
369
+ value: false
370
+ output_scores:
371
+ value: false
372
+ overwrite_output_dir:
373
+ value: true
374
+ pad_token_id:
375
+ value: 50257
376
+ past_index:
377
+ value: -1
378
+ per_device_eval_batch_size:
379
+ value: 16
380
+ per_device_train_batch_size:
381
+ value: 32
382
+ per_gpu_eval_batch_size:
383
+ value: null
384
+ per_gpu_train_batch_size:
385
+ value: null
386
+ predict_with_generate:
387
+ value: true
388
+ prediction_loss_only:
389
+ value: false
390
+ prefix:
391
+ value: null
392
+ problem_type:
393
+ value: null
394
+ push_to_hub:
395
+ value: true
396
+ push_to_hub_model_id:
397
+ value: null
398
+ push_to_hub_organization:
399
+ value: null
400
+ push_to_hub_token:
401
+ value: <PUSH_TO_HUB_TOKEN>
402
+ ray_scope:
403
+ value: last
404
+ remove_invalid_values:
405
+ value: false
406
+ remove_unused_columns:
407
+ value: true
408
+ repetition_penalty:
409
+ value: 1
410
+ report_to:
411
+ value:
412
+ - wandb
413
+ restore_callback_states_from_checkpoint:
414
+ value: false
415
+ resume_from_checkpoint:
416
+ value: null
417
+ return_dict:
418
+ value: true
419
+ return_dict_in_generate:
420
+ value: false
421
+ run_name:
422
+ value: whisper-small-eu
423
+ save_on_each_node:
424
+ value: false
425
+ save_only_model:
426
+ value: false
427
+ save_safetensors:
428
+ value: true
429
+ save_steps:
430
+ value: 1000
431
+ save_strategy:
432
+ value: steps
433
+ save_total_limit:
434
+ value: null
435
+ scale_embedding:
436
+ value: false
437
+ seed:
438
+ value: 42
439
+ sep_token_id:
440
+ value: null
441
+ skip_memory_metrics:
442
+ value: true
443
+ sortish_sampler:
444
+ value: false
445
+ split_batches:
446
+ value: null
447
+ suppress_tokens:
448
+ value: null
449
+ task_specific_params:
450
+ value: null
451
+ temperature:
452
+ value: 1
453
+ tf_legacy_loss:
454
+ value: false
455
+ tf32:
456
+ value: null
457
+ tie_encoder_decoder:
458
+ value: false
459
+ tie_word_embeddings:
460
+ value: true
461
+ tokenizer_class:
462
+ value: null
463
+ top_k:
464
+ value: 50
465
+ top_p:
466
+ value: 1
467
+ torch_compile:
468
+ value: false
469
+ torch_compile_backend:
470
+ value: null
471
+ torch_compile_mode:
472
+ value: null
473
+ torch_dtype:
474
+ value: float32
475
+ torch_empty_cache_steps:
476
+ value: null
477
+ torchdynamo:
478
+ value: null
479
+ torchscript:
480
+ value: false
481
+ tpu_metrics_debug:
482
+ value: false
483
+ tpu_num_cores:
484
+ value: null
485
+ transformers_version:
486
+ value: 4.49.0.dev0
487
+ typical_p:
488
+ value: 1
489
+ use_bfloat16:
490
+ value: false
491
+ use_cache:
492
+ value: false
493
+ use_cpu:
494
+ value: false
495
+ use_ipex:
496
+ value: false
497
+ use_legacy_prediction_loop:
498
+ value: false
499
+ use_liger_kernel:
500
+ value: false
501
+ use_mps_device:
502
+ value: false
503
+ use_weighted_layer_sum:
504
+ value: false
505
+ vocab_size:
506
+ value: 51865
507
+ warmup_ratio:
508
+ value: 0
509
+ warmup_steps:
510
+ value: 500
511
+ weight_decay:
512
+ value: 0
wandb/run-20250212_122854-4m048f5s/files/output.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
+ main()
4
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
+ return inner_training_loop(
9
+ ^^^^^^^^^^^^^^^^^^^^
10
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
+ self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
+ return self.call_event("on_epoch_begin", args, state, control)
15
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
+ result = getattr(callback, event)(
18
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
19
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
+ if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
+ ^^^^^^^^^^^^^^^^^^^^^^^^
22
+ AttributeError: 'NoneType' object has no attribute 'dataset'
wandb/run-20250212_122854-4m048f5s/files/requirements.txt ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiosignal==1.3.2
2
+ Markdown==3.7
3
+ more-itertools==10.6.0
4
+ requests==2.32.3
5
+ sentry-sdk==2.21.0
6
+ torchaudio==2.6.0
7
+ charset-normalizer==3.4.1
8
+ docker-pycreds==0.4.0
9
+ nvidia-cusolver-cu12==11.6.1.9
10
+ PyYAML==6.0.2
11
+ librosa==0.10.2.post1
12
+ soxr==0.5.0.post1
13
+ multiprocess==0.70.16
14
+ setuptools==75.8.0
15
+ nvidia-cufft-cu12==11.2.1.3
16
+ joblib==1.4.2
17
+ pytz==2025.1
18
+ pip==24.0
19
+ scikit-learn==1.6.1
20
+ certifi==2025.1.31
21
+ jiwer==3.1.0
22
+ regex==2024.11.6
23
+ annotated-types==0.7.0
24
+ grpcio==1.70.0
25
+ msgpack==1.1.0
26
+ mpmath==1.3.0
27
+ nvidia-cudnn-cu12==9.1.0.70
28
+ soundfile==0.13.1
29
+ dill==0.3.8
30
+ nvidia-nvtx-cu12==12.4.127
31
+ six==1.17.0
32
+ nvidia-cuda-cupti-cu12==12.4.127
33
+ pyarrow==19.0.0
34
+ nvidia-nccl-cu12==2.21.5
35
+ psutil==6.1.1
36
+ decorator==5.1.1
37
+ llvmlite==0.44.0
38
+ frozenlist==1.5.0
39
+ pydantic==2.10.6
40
+ networkx==3.4.2
41
+ idna==3.10
42
+ wandb==0.19.6
43
+ aiohttp==3.11.12
44
+ RapidFuzz==3.12.1
45
+ pandas==2.2.3
46
+ python-dateutil==2.9.0.post0
47
+ numpy==2.1.3
48
+ tokenizers==0.21.0
49
+ nvidia-cusparselt-cu12==0.6.2
50
+ typing_extensions==4.12.2
51
+ urllib3==2.3.0
52
+ setproctitle==1.3.4
53
+ tzdata==2025.1
54
+ sympy==1.13.1
55
+ pooch==1.8.2
56
+ click==8.1.8
57
+ pydantic_core==2.27.2
58
+ MarkupSafe==3.0.2
59
+ scipy==1.15.1
60
+ accelerate==1.3.0
61
+ tensorboard==2.19.0
62
+ protobuf==5.29.3
63
+ gitdb==4.0.12
64
+ smmap==5.0.2
65
+ absl-py==2.1.0
66
+ tqdm==4.67.1
67
+ yarl==1.18.3
68
+ pycparser==2.22
69
+ nvidia-cusparse-cu12==12.3.1.170
70
+ attrs==25.1.0
71
+ lazy_loader==0.4
72
+ tensorboard-data-server==0.7.2
73
+ threadpoolctl==3.5.0
74
+ GitPython==3.1.44
75
+ safetensors==0.5.2
76
+ fsspec==2024.12.0
77
+ nvidia-cuda-nvrtc-cu12==12.4.127
78
+ filelock==3.17.0
79
+ aiohappyeyeballs==2.4.6
80
+ packaging==24.2
81
+ datasets==3.2.1.dev0
82
+ audioread==3.0.1
83
+ propcache==0.2.1
84
+ transformers==4.49.0.dev0
85
+ nvidia-cuda-runtime-cu12==12.4.127
86
+ cffi==1.17.1
87
+ evaluate==0.4.3
88
+ Werkzeug==3.1.3
89
+ huggingface-hub==0.28.1
90
+ Jinja2==3.1.5
91
+ torch==2.6.0
92
+ nvidia-curand-cu12==10.3.5.147
93
+ xxhash==3.5.0
94
+ platformdirs==4.3.6
95
+ multidict==6.1.0
96
+ nvidia-cublas-cu12==12.4.5.8
97
+ nvidia-nvjitlink-cu12==12.4.127
98
+ triton==3.2.0
99
+ numba==0.61.0
100
+ importlib_metadata==8.0.0
101
+ platformdirs==4.2.2
102
+ typeguard==4.3.0
103
+ more-itertools==10.3.0
104
+ tomli==2.0.1
105
+ autocommand==2.2.2
106
+ zipp==3.19.2
107
+ typing_extensions==4.12.2
108
+ backports.tarfile==1.2.0
109
+ inflect==7.3.1
110
+ jaraco.text==3.12.1
111
+ wheel==0.43.0
112
+ packaging==24.2
113
+ jaraco.collections==5.1.0
114
+ jaraco.functools==4.0.1
115
+ jaraco.context==5.3.0
wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.3",
4
+ "startedAt": "2025-02-12T12:28:54.528397Z",
5
+ "args": [
6
+ "--model_name_or_path=openai/whisper-small",
7
+ "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
+ "--language=basque",
9
+ "--train_split_name=train",
10
+ "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
11
+ "--model_index_name=Whisper Small Basque",
12
+ "--max_steps=8000",
13
+ "--output_dir=./",
14
+ "--per_device_train_batch_size=32",
15
+ "--per_device_eval_batch_size=16",
16
+ "--gradient_accumulation_steps=1",
17
+ "--logging_steps=25",
18
+ "--learning_rate=1e-5",
19
+ "--warmup_steps=500",
20
+ "--evaluation_strategy=steps",
21
+ "--eval_steps=1000",
22
+ "--save_strategy=steps",
23
+ "--save_steps=1000",
24
+ "--generation_max_length=225",
25
+ "--length_column_name=input_length",
26
+ "--max_duration_in_seconds=30",
27
+ "--text_column_name=sentence",
28
+ "--freeze_feature_encoder=False",
29
+ "--report_to=tensorboard",
30
+ "--metric_for_best_model=wer",
31
+ "--greater_is_better=False",
32
+ "--load_best_model_at_end",
33
+ "--gradient_checkpointing",
34
+ "--fp16",
35
+ "--overwrite_output_dir",
36
+ "--do_train",
37
+ "--do_eval",
38
+ "--predict_with_generate",
39
+ "--do_normalize_eval",
40
+ "--streaming",
41
+ "--use_auth_token",
42
+ "--push_to_hub",
43
+ "--report_to",
44
+ "wandb",
45
+ "--run_name",
46
+ "whisper-small-eu"
47
+ ],
48
+ "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
+ "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
+ "git": {
51
+ "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
+ "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
+ },
54
+ "email": "[email protected]",
55
+ "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
+ "host": "tknika",
57
+ "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
+ "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
+ "cpu_count": 8,
60
+ "cpu_count_logical": 8,
61
+ "gpu": "NVIDIA L40-48Q",
62
+ "gpu_count": 1,
63
+ "disk": {
64
+ "/": {
65
+ "total": "525987168256",
66
+ "used": "297346756608"
67
+ }
68
+ },
69
+ "memory": {
70
+ "total": "33654022144"
71
+ },
72
+ "cpu": {
73
+ "count": 8,
74
+ "countLogical": 8
75
+ },
76
+ "gpu_nvidia": [
77
+ {
78
+ "name": "NVIDIA L40-48Q",
79
+ "memoryTotal": "51539607552",
80
+ "cudaCores": 18176,
81
+ "architecture": "Ada"
82
+ }
83
+ ],
84
+ "cudaVersion": "12.4"
85
+ }
wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0}}
wandb/run-20250212_122854-4m048f5s/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528}
3
+ {"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}}
4
+ {"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"}
5
+ {"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"}
6
+ {"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"}
7
+ {"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"}
8
+ {"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"}
9
+ {"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"}
11
+ {"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"}
12
+ {"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"}
13
+ {"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"}
14
+ {"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"}
wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"}
2
+ {"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"}
3
+ {"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"}
4
+ {"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"}
5
+ {"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"}
6
+ {"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"}
7
+ {"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"}
9
+ {"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"}
13
+ {"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"}
14
+ {"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"}
15
+ {"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"}
wandb/run-20250212_122854-4m048f5s/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528
3
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log
7
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
8
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():756] calling init triggers
9
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():789] starting backend
12
+ 2025-02-12 12:28:54,521 INFO MainThread:224528 [wandb_init.py:init():793] sending inform_init request
13
+ 2025-02-12 12:28:54,527 INFO MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-02-12 12:28:54,528 INFO MainThread:224528 [wandb_init.py:init():808] backend started and connected
15
+ 2025-02-12 12:28:54,530 INFO MainThread:224528 [wandb_init.py:init():901] updated telemetry
16
+ 2025-02-12 12:28:54,537 INFO MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
+ 2025-02-12 12:28:54,883 INFO MainThread:224528 [wandb_init.py:init():994] starting run threads in backend
18
+ 2025-02-12 12:28:54,988 INFO MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg
19
+ 2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
+ 2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
+ 2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed.
22
+ 2025-02-12 12:28:54,990 INFO MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process
23
+ 2025-02-12 12:28:54,991 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
+ 2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x71c5f6c57cb0>>
25
+ 2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
+ 2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed
wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb ADDED
Binary file (11.3 kB). View file
 
wandb/run-20250212_125202-c6xjc1gs/files/config.yaml ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _attn_implementation_autoset:
2
+ value: true
3
+ _name_or_path:
4
+ value: openai/whisper-small
5
+ _wandb:
6
+ value:
7
+ cli_version: 0.19.6
8
+ m:
9
+ - "1": train/global_step
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ python_version: 3.12.3
14
+ t:
15
+ "1":
16
+ - 1
17
+ - 5
18
+ - 11
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 100
25
+ "2":
26
+ - 1
27
+ - 5
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 53
32
+ - 55
33
+ - 71
34
+ - 100
35
+ "3":
36
+ - 7
37
+ - 13
38
+ - 19
39
+ - 23
40
+ - 55
41
+ - 66
42
+ "4": 3.12.3
43
+ "5": 0.19.6
44
+ "6": 4.49.0.dev0
45
+ "8":
46
+ - 5
47
+ "9":
48
+ "1": transformers_trainer
49
+ "12": 0.19.6
50
+ "13": linux-x86_64
51
+ accelerator_config:
52
+ value:
53
+ dispatch_batches: null
54
+ even_batches: true
55
+ gradient_accumulation_kwargs: null
56
+ non_blocking: false
57
+ split_batches: false
58
+ use_seedable_sampler: true
59
+ activation_dropout:
60
+ value: 0
61
+ activation_function:
62
+ value: gelu
63
+ adafactor:
64
+ value: false
65
+ adam_beta1:
66
+ value: 0.9
67
+ adam_beta2:
68
+ value: 0.999
69
+ adam_epsilon:
70
+ value: 1e-08
71
+ add_cross_attention:
72
+ value: false
73
+ apply_spec_augment:
74
+ value: false
75
+ architectures:
76
+ value:
77
+ - WhisperForConditionalGeneration
78
+ attention_dropout:
79
+ value: 0
80
+ auto_find_batch_size:
81
+ value: false
82
+ average_tokens_across_devices:
83
+ value: false
84
+ bad_words_ids:
85
+ value: null
86
+ batch_eval_metrics:
87
+ value: false
88
+ begin_suppress_tokens:
89
+ value:
90
+ - 220
91
+ - 50257
92
+ bf16:
93
+ value: false
94
+ bf16_full_eval:
95
+ value: false
96
+ bos_token_id:
97
+ value: 50257
98
+ chunk_size_feed_forward:
99
+ value: 0
100
+ classifier_proj_size:
101
+ value: 256
102
+ cross_attention_hidden_size:
103
+ value: null
104
+ d_model:
105
+ value: 768
106
+ data_seed:
107
+ value: null
108
+ dataloader_drop_last:
109
+ value: false
110
+ dataloader_num_workers:
111
+ value: 0
112
+ dataloader_persistent_workers:
113
+ value: false
114
+ dataloader_pin_memory:
115
+ value: true
116
+ dataloader_prefetch_factor:
117
+ value: null
118
+ ddp_backend:
119
+ value: null
120
+ ddp_broadcast_buffers:
121
+ value: null
122
+ ddp_bucket_cap_mb:
123
+ value: null
124
+ ddp_find_unused_parameters:
125
+ value: null
126
+ ddp_timeout:
127
+ value: 1800
128
+ debug:
129
+ value: []
130
+ decoder_attention_heads:
131
+ value: 12
132
+ decoder_ffn_dim:
133
+ value: 3072
134
+ decoder_layerdrop:
135
+ value: 0
136
+ decoder_layers:
137
+ value: 12
138
+ decoder_start_token_id:
139
+ value: 50258
140
+ deepspeed:
141
+ value: null
142
+ disable_tqdm:
143
+ value: false
144
+ dispatch_batches:
145
+ value: null
146
+ diversity_penalty:
147
+ value: 0
148
+ do_eval:
149
+ value: true
150
+ do_predict:
151
+ value: false
152
+ do_sample:
153
+ value: false
154
+ do_train:
155
+ value: true
156
+ dropout:
157
+ value: 0
158
+ early_stopping:
159
+ value: false
160
+ encoder_attention_heads:
161
+ value: 12
162
+ encoder_ffn_dim:
163
+ value: 3072
164
+ encoder_layerdrop:
165
+ value: 0
166
+ encoder_layers:
167
+ value: 12
168
+ encoder_no_repeat_ngram_size:
169
+ value: 0
170
+ eos_token_id:
171
+ value: 50257
172
+ eval_accumulation_steps:
173
+ value: null
174
+ eval_delay:
175
+ value: 0
176
+ eval_do_concat_batches:
177
+ value: true
178
+ eval_on_start:
179
+ value: false
180
+ eval_steps:
181
+ value: 1000
182
+ eval_strategy:
183
+ value: steps
184
+ eval_use_gather_object:
185
+ value: false
186
+ evaluation_strategy:
187
+ value: steps
188
+ exponential_decay_length_penalty:
189
+ value: null
190
+ finetuning_task:
191
+ value: null
192
+ forced_bos_token_id:
193
+ value: null
194
+ forced_decoder_ids:
195
+ value: null
196
+ forced_eos_token_id:
197
+ value: null
198
+ fp16:
199
+ value: true
200
+ fp16_backend:
201
+ value: auto
202
+ fp16_full_eval:
203
+ value: false
204
+ fp16_opt_level:
205
+ value: O1
206
+ fsdp:
207
+ value: []
208
+ fsdp_config:
209
+ value:
210
+ min_num_params: 0
211
+ xla: false
212
+ xla_fsdp_grad_ckpt: false
213
+ xla_fsdp_v2: false
214
+ fsdp_min_num_params:
215
+ value: 0
216
+ fsdp_transformer_layer_cls_to_wrap:
217
+ value: null
218
+ full_determinism:
219
+ value: false
220
+ generation_config:
221
+ value: null
222
+ generation_max_length:
223
+ value: 225
224
+ generation_num_beams:
225
+ value: null
226
+ gradient_accumulation_steps:
227
+ value: 1
228
+ gradient_checkpointing:
229
+ value: true
230
+ gradient_checkpointing_kwargs:
231
+ value: null
232
+ greater_is_better:
233
+ value: false
234
+ group_by_length:
235
+ value: false
236
+ half_precision_backend:
237
+ value: auto
238
+ hub_always_push:
239
+ value: false
240
+ hub_model_id:
241
+ value: null
242
+ hub_private_repo:
243
+ value: null
244
+ hub_strategy:
245
+ value: every_save
246
+ hub_token:
247
+ value: <HUB_TOKEN>
248
+ id2label:
249
+ value:
250
+ "0": LABEL_0
251
+ "1": LABEL_1
252
+ ignore_data_skip:
253
+ value: false
254
+ include_for_metrics:
255
+ value: []
256
+ include_inputs_for_metrics:
257
+ value: false
258
+ include_num_input_tokens_seen:
259
+ value: false
260
+ include_tokens_per_second:
261
+ value: false
262
+ init_std:
263
+ value: 0.02
264
+ is_decoder:
265
+ value: false
266
+ is_encoder_decoder:
267
+ value: true
268
+ jit_mode_eval:
269
+ value: false
270
+ label_names:
271
+ value: null
272
+ label_smoothing_factor:
273
+ value: 0
274
+ label2id:
275
+ value:
276
+ LABEL_0: 0
277
+ LABEL_1: 1
278
+ learning_rate:
279
+ value: 1e-05
280
+ length_column_name:
281
+ value: input_length
282
+ length_penalty:
283
+ value: 1
284
+ load_best_model_at_end:
285
+ value: true
286
+ local_rank:
287
+ value: 0
288
+ log_level:
289
+ value: passive
290
+ log_level_replica:
291
+ value: warning
292
+ log_on_each_node:
293
+ value: true
294
+ logging_dir:
295
+ value: ./runs/Feb12_12-51-48_tknika
296
+ logging_first_step:
297
+ value: false
298
+ logging_nan_inf_filter:
299
+ value: true
300
+ logging_steps:
301
+ value: 25
302
+ logging_strategy:
303
+ value: steps
304
+ lr_scheduler_type:
305
+ value: linear
306
+ mask_feature_length:
307
+ value: 10
308
+ mask_feature_min_masks:
309
+ value: 0
310
+ mask_feature_prob:
311
+ value: 0
312
+ mask_time_length:
313
+ value: 10
314
+ mask_time_min_masks:
315
+ value: 2
316
+ mask_time_prob:
317
+ value: 0.05
318
+ max_grad_norm:
319
+ value: 1
320
+ max_length:
321
+ value: 448
322
+ max_source_positions:
323
+ value: 1500
324
+ max_steps:
325
+ value: 8000
326
+ max_target_positions:
327
+ value: 448
328
+ median_filter_width:
329
+ value: 7
330
+ metric_for_best_model:
331
+ value: wer
332
+ min_length:
333
+ value: 0
334
+ model/num_parameters:
335
+ value: 241734912
336
+ model_type:
337
+ value: whisper
338
+ mp_parameters:
339
+ value: ""
340
+ neftune_noise_alpha:
341
+ value: null
342
+ no_cuda:
343
+ value: false
344
+ no_repeat_ngram_size:
345
+ value: 0
346
+ num_beam_groups:
347
+ value: 1
348
+ num_beams:
349
+ value: 1
350
+ num_hidden_layers:
351
+ value: 12
352
+ num_mel_bins:
353
+ value: 80
354
+ num_return_sequences:
355
+ value: 1
356
+ num_train_epochs:
357
+ value: 3
358
+ optim:
359
+ value: adamw_torch
360
+ optim_args:
361
+ value: null
362
+ optim_target_modules:
363
+ value: null
364
+ output_attentions:
365
+ value: false
366
+ output_dir:
367
+ value: ./
368
+ output_hidden_states:
369
+ value: false
370
+ output_scores:
371
+ value: false
372
+ overwrite_output_dir:
373
+ value: true
374
+ pad_token_id:
375
+ value: 50257
376
+ past_index:
377
+ value: -1
378
+ per_device_eval_batch_size:
379
+ value: 16
380
+ per_device_train_batch_size:
381
+ value: 32
382
+ per_gpu_eval_batch_size:
383
+ value: null
384
+ per_gpu_train_batch_size:
385
+ value: null
386
+ predict_with_generate:
387
+ value: true
388
+ prediction_loss_only:
389
+ value: false
390
+ prefix:
391
+ value: null
392
+ problem_type:
393
+ value: null
394
+ push_to_hub:
395
+ value: true
396
+ push_to_hub_model_id:
397
+ value: null
398
+ push_to_hub_organization:
399
+ value: null
400
+ push_to_hub_token:
401
+ value: <PUSH_TO_HUB_TOKEN>
402
+ ray_scope:
403
+ value: last
404
+ remove_invalid_values:
405
+ value: false
406
+ remove_unused_columns:
407
+ value: true
408
+ repetition_penalty:
409
+ value: 1
410
+ report_to:
411
+ value:
412
+ - wandb
413
+ restore_callback_states_from_checkpoint:
414
+ value: false
415
+ resume_from_checkpoint:
416
+ value: null
417
+ return_dict:
418
+ value: true
419
+ return_dict_in_generate:
420
+ value: false
421
+ run_name:
422
+ value: whisper-small-eu
423
+ save_on_each_node:
424
+ value: false
425
+ save_only_model:
426
+ value: false
427
+ save_safetensors:
428
+ value: true
429
+ save_steps:
430
+ value: 1000
431
+ save_strategy:
432
+ value: steps
433
+ save_total_limit:
434
+ value: null
435
+ scale_embedding:
436
+ value: false
437
+ seed:
438
+ value: 42
439
+ sep_token_id:
440
+ value: null
441
+ skip_memory_metrics:
442
+ value: true
443
+ sortish_sampler:
444
+ value: false
445
+ split_batches:
446
+ value: null
447
+ suppress_tokens:
448
+ value: null
449
+ task_specific_params:
450
+ value: null
451
+ temperature:
452
+ value: 1
453
+ tf_legacy_loss:
454
+ value: false
455
+ tf32:
456
+ value: null
457
+ tie_encoder_decoder:
458
+ value: false
459
+ tie_word_embeddings:
460
+ value: true
461
+ tokenizer_class:
462
+ value: null
463
+ top_k:
464
+ value: 50
465
+ top_p:
466
+ value: 1
467
+ torch_compile:
468
+ value: false
469
+ torch_compile_backend:
470
+ value: null
471
+ torch_compile_mode:
472
+ value: null
473
+ torch_dtype:
474
+ value: float32
475
+ torch_empty_cache_steps:
476
+ value: null
477
+ torchdynamo:
478
+ value: null
479
+ torchscript:
480
+ value: false
481
+ tpu_metrics_debug:
482
+ value: false
483
+ tpu_num_cores:
484
+ value: null
485
+ transformers_version:
486
+ value: 4.49.0.dev0
487
+ typical_p:
488
+ value: 1
489
+ use_bfloat16:
490
+ value: false
491
+ use_cache:
492
+ value: false
493
+ use_cpu:
494
+ value: false
495
+ use_ipex:
496
+ value: false
497
+ use_legacy_prediction_loop:
498
+ value: false
499
+ use_liger_kernel:
500
+ value: false
501
+ use_mps_device:
502
+ value: false
503
+ use_weighted_layer_sum:
504
+ value: false
505
+ vocab_size:
506
+ value: 51865
507
+ warmup_ratio:
508
+ value: 0
509
+ warmup_steps:
510
+ value: 500
511
+ weight_decay:
512
+ value: 0
wandb/run-20250212_125202-c6xjc1gs/files/output.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
+ main()
4
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
+ return inner_training_loop(
9
+ ^^^^^^^^^^^^^^^^^^^^
10
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
+ self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
+ return self.call_event("on_epoch_begin", args, state, control)
15
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
+ result = getattr(callback, event)(
18
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
19
+ File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
+ if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
+ ^^^^^^^^^^^^^^^^^^^^^^^^
22
+ AttributeError: 'NoneType' object has no attribute 'dataset'
wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiosignal==1.3.2
2
+ Markdown==3.7
3
+ more-itertools==10.6.0
4
+ requests==2.32.3
5
+ sentry-sdk==2.21.0
6
+ torchaudio==2.6.0
7
+ charset-normalizer==3.4.1
8
+ docker-pycreds==0.4.0
9
+ nvidia-cusolver-cu12==11.6.1.9
10
+ PyYAML==6.0.2
11
+ librosa==0.10.2.post1
12
+ soxr==0.5.0.post1
13
+ multiprocess==0.70.16
14
+ setuptools==75.8.0
15
+ nvidia-cufft-cu12==11.2.1.3
16
+ joblib==1.4.2
17
+ pytz==2025.1
18
+ pip==24.0
19
+ scikit-learn==1.6.1
20
+ certifi==2025.1.31
21
+ jiwer==3.1.0
22
+ regex==2024.11.6
23
+ annotated-types==0.7.0
24
+ grpcio==1.70.0
25
+ msgpack==1.1.0
26
+ mpmath==1.3.0
27
+ nvidia-cudnn-cu12==9.1.0.70
28
+ soundfile==0.13.1
29
+ dill==0.3.8
30
+ nvidia-nvtx-cu12==12.4.127
31
+ six==1.17.0
32
+ nvidia-cuda-cupti-cu12==12.4.127
33
+ pyarrow==19.0.0
34
+ nvidia-nccl-cu12==2.21.5
35
+ psutil==6.1.1
36
+ decorator==5.1.1
37
+ llvmlite==0.44.0
38
+ frozenlist==1.5.0
39
+ pydantic==2.10.6
40
+ networkx==3.4.2
41
+ idna==3.10
42
+ wandb==0.19.6
43
+ aiohttp==3.11.12
44
+ RapidFuzz==3.12.1
45
+ pandas==2.2.3
46
+ python-dateutil==2.9.0.post0
47
+ numpy==2.1.3
48
+ tokenizers==0.21.0
49
+ nvidia-cusparselt-cu12==0.6.2
50
+ typing_extensions==4.12.2
51
+ urllib3==2.3.0
52
+ setproctitle==1.3.4
53
+ tzdata==2025.1
54
+ sympy==1.13.1
55
+ pooch==1.8.2
56
+ click==8.1.8
57
+ pydantic_core==2.27.2
58
+ MarkupSafe==3.0.2
59
+ scipy==1.15.1
60
+ accelerate==1.3.0
61
+ tensorboard==2.19.0
62
+ protobuf==5.29.3
63
+ gitdb==4.0.12
64
+ smmap==5.0.2
65
+ absl-py==2.1.0
66
+ tqdm==4.67.1
67
+ yarl==1.18.3
68
+ pycparser==2.22
69
+ nvidia-cusparse-cu12==12.3.1.170
70
+ attrs==25.1.0
71
+ lazy_loader==0.4
72
+ tensorboard-data-server==0.7.2
73
+ threadpoolctl==3.5.0
74
+ GitPython==3.1.44
75
+ safetensors==0.5.2
76
+ fsspec==2024.12.0
77
+ nvidia-cuda-nvrtc-cu12==12.4.127
78
+ filelock==3.17.0
79
+ aiohappyeyeballs==2.4.6
80
+ packaging==24.2
81
+ datasets==3.2.1.dev0
82
+ audioread==3.0.1
83
+ propcache==0.2.1
84
+ transformers==4.49.0.dev0
85
+ nvidia-cuda-runtime-cu12==12.4.127
86
+ cffi==1.17.1
87
+ evaluate==0.4.3
88
+ Werkzeug==3.1.3
89
+ huggingface-hub==0.28.1
90
+ Jinja2==3.1.5
91
+ torch==2.6.0
92
+ nvidia-curand-cu12==10.3.5.147
93
+ xxhash==3.5.0
94
+ platformdirs==4.3.6
95
+ multidict==6.1.0
96
+ nvidia-cublas-cu12==12.4.5.8
97
+ nvidia-nvjitlink-cu12==12.4.127
98
+ triton==3.2.0
99
+ numba==0.61.0
100
+ importlib_metadata==8.0.0
101
+ platformdirs==4.2.2
102
+ typeguard==4.3.0
103
+ more-itertools==10.3.0
104
+ tomli==2.0.1
105
+ autocommand==2.2.2
106
+ zipp==3.19.2
107
+ typing_extensions==4.12.2
108
+ backports.tarfile==1.2.0
109
+ inflect==7.3.1
110
+ jaraco.text==3.12.1
111
+ wheel==0.43.0
112
+ packaging==24.2
113
+ jaraco.collections==5.1.0
114
+ jaraco.functools==4.0.1
115
+ jaraco.context==5.3.0
wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.3",
4
+ "startedAt": "2025-02-12T12:52:03.105234Z",
5
+ "args": [
6
+ "--model_name_or_path=openai/whisper-small",
7
+ "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
+ "--language=basque",
9
+ "--train_split_name=train",
10
+ "--eval_split_name=test_parl",
11
+ "--model_index_name=Whisper Small Basque",
12
+ "--max_steps=8000",
13
+ "--output_dir=./",
14
+ "--per_device_train_batch_size=32",
15
+ "--per_device_eval_batch_size=16",
16
+ "--gradient_accumulation_steps=1",
17
+ "--logging_steps=25",
18
+ "--learning_rate=1e-5",
19
+ "--warmup_steps=500",
20
+ "--evaluation_strategy=steps",
21
+ "--eval_steps=1000",
22
+ "--save_strategy=steps",
23
+ "--save_steps=1000",
24
+ "--generation_max_length=225",
25
+ "--length_column_name=input_length",
26
+ "--max_duration_in_seconds=30",
27
+ "--text_column_name=sentence",
28
+ "--freeze_feature_encoder=False",
29
+ "--report_to=tensorboard",
30
+ "--metric_for_best_model=wer",
31
+ "--greater_is_better=False",
32
+ "--load_best_model_at_end",
33
+ "--gradient_checkpointing",
34
+ "--fp16",
35
+ "--overwrite_output_dir",
36
+ "--do_train",
37
+ "--do_eval",
38
+ "--predict_with_generate",
39
+ "--do_normalize_eval",
40
+ "--streaming",
41
+ "--use_auth_token",
42
+ "--push_to_hub",
43
+ "--report_to",
44
+ "wandb",
45
+ "--run_name",
46
+ "whisper-small-eu"
47
+ ],
48
+ "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
+ "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
+ "git": {
51
+ "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
+ "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
+ },
54
+ "email": "[email protected]",
55
+ "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
+ "host": "tknika",
57
+ "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
+ "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
+ "cpu_count": 8,
60
+ "cpu_count_logical": 8,
61
+ "gpu": "NVIDIA L40-48Q",
62
+ "gpu_count": 1,
63
+ "disk": {
64
+ "/": {
65
+ "total": "525987168256",
66
+ "used": "313777016832"
67
+ }
68
+ },
69
+ "memory": {
70
+ "total": "33654022144"
71
+ },
72
+ "cpu": {
73
+ "count": 8,
74
+ "countLogical": 8
75
+ },
76
+ "gpu_nvidia": [
77
+ {
78
+ "name": "NVIDIA L40-48Q",
79
+ "memoryTotal": "51539607552",
80
+ "cudaCores": 18176,
81
+ "architecture": "Ada"
82
+ }
83
+ ],
84
+ "cudaVersion": "12.4"
85
+ }
wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0}}
wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112}
3
+ {"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}}
4
+ {"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"}
5
+ {"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
6
+ {"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
7
+ {"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"}
8
+ {"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"}
9
+ {"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"}
11
+ {"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"}
12
+ {"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"}
13
+ {"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"}
14
+ {"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"}
wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"}
2
+ {"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"}
3
+ {"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"}
4
+ {"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"}
5
+ {"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"}
6
+ {"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"}
7
+ {"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"}
9
+ {"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"}
13
+ {"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"}
14
+ {"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"}
15
+ {"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"}
wandb/run-20250212_125202-c6xjc1gs/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-02-12 12:52:02,886 INFO MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112
3
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
7
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
8
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():756] calling init triggers
9
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():789] starting backend
12
+ 2025-02-12 12:52:03,097 INFO MainThread:226112 [wandb_init.py:init():793] sending inform_init request
13
+ 2025-02-12 12:52:03,104 INFO MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-02-12 12:52:03,104 INFO MainThread:226112 [wandb_init.py:init():808] backend started and connected
15
+ 2025-02-12 12:52:03,107 INFO MainThread:226112 [wandb_init.py:init():901] updated telemetry
16
+ 2025-02-12 12:52:03,114 INFO MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
+ 2025-02-12 12:52:03,483 INFO MainThread:226112 [wandb_init.py:init():994] starting run threads in backend
18
+ 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg
19
+ 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
+ 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
+ 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed.
22
+ 2025-02-12 12:52:03,568 INFO MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process
23
+ 2025-02-12 12:52:03,569 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
+ 2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7d4830f2ddf0>>
25
+ 2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
+ 2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed
wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb ADDED
Binary file (11.3 kB). View file
 
wandb/run-20250212_125924-xhsgsxqq/files/config.yaml ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _attn_implementation_autoset:
2
+ value: true
3
+ _name_or_path:
4
+ value: openai/whisper-small
5
+ _wandb:
6
+ value:
7
+ cli_version: 0.19.6
8
+ m:
9
+ - "1": train/global_step
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ python_version: 3.12.3
14
+ t:
15
+ "1":
16
+ - 1
17
+ - 5
18
+ - 11
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 100
25
+ "2":
26
+ - 1
27
+ - 5
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 53
32
+ - 55
33
+ - 71
34
+ - 100
35
+ "3":
36
+ - 7
37
+ - 13
38
+ - 19
39
+ - 23
40
+ - 55
41
+ - 66
42
+ "4": 3.12.3
43
+ "5": 0.19.6
44
+ "6": 4.49.0.dev0
45
+ "8":
46
+ - 5
47
+ "9":
48
+ "1": transformers_trainer
49
+ "12": 0.19.6
50
+ "13": linux-x86_64
51
+ accelerator_config:
52
+ value:
53
+ dispatch_batches: null
54
+ even_batches: true
55
+ gradient_accumulation_kwargs: null
56
+ non_blocking: false
57
+ split_batches: false
58
+ use_seedable_sampler: true
59
+ activation_dropout:
60
+ value: 0
61
+ activation_function:
62
+ value: gelu
63
+ adafactor:
64
+ value: false
65
+ adam_beta1:
66
+ value: 0.9
67
+ adam_beta2:
68
+ value: 0.999
69
+ adam_epsilon:
70
+ value: 1e-08
71
+ add_cross_attention:
72
+ value: false
73
+ apply_spec_augment:
74
+ value: false
75
+ architectures:
76
+ value:
77
+ - WhisperForConditionalGeneration
78
+ attention_dropout:
79
+ value: 0
80
+ auto_find_batch_size:
81
+ value: false
82
+ average_tokens_across_devices:
83
+ value: false
84
+ bad_words_ids:
85
+ value: null
86
+ batch_eval_metrics:
87
+ value: false
88
+ begin_suppress_tokens:
89
+ value:
90
+ - 220
91
+ - 50257
92
+ bf16:
93
+ value: false
94
+ bf16_full_eval:
95
+ value: false
96
+ bos_token_id:
97
+ value: 50257
98
+ chunk_size_feed_forward:
99
+ value: 0
100
+ classifier_proj_size:
101
+ value: 256
102
+ cross_attention_hidden_size:
103
+ value: null
104
+ d_model:
105
+ value: 768
106
+ data_seed:
107
+ value: null
108
+ dataloader_drop_last:
109
+ value: false
110
+ dataloader_num_workers:
111
+ value: 0
112
+ dataloader_persistent_workers:
113
+ value: false
114
+ dataloader_pin_memory:
115
+ value: true
116
+ dataloader_prefetch_factor:
117
+ value: null
118
+ ddp_backend:
119
+ value: null
120
+ ddp_broadcast_buffers:
121
+ value: null
122
+ ddp_bucket_cap_mb:
123
+ value: null
124
+ ddp_find_unused_parameters:
125
+ value: null
126
+ ddp_timeout:
127
+ value: 1800
128
+ debug:
129
+ value: []
130
+ decoder_attention_heads:
131
+ value: 12
132
+ decoder_ffn_dim:
133
+ value: 3072
134
+ decoder_layerdrop:
135
+ value: 0
136
+ decoder_layers:
137
+ value: 12
138
+ decoder_start_token_id:
139
+ value: 50258
140
+ deepspeed:
141
+ value: null
142
+ disable_tqdm:
143
+ value: false
144
+ dispatch_batches:
145
+ value: null
146
+ diversity_penalty:
147
+ value: 0
148
+ do_eval:
149
+ value: true
150
+ do_predict:
151
+ value: false
152
+ do_sample:
153
+ value: false
154
+ do_train:
155
+ value: true
156
+ dropout:
157
+ value: 0
158
+ early_stopping:
159
+ value: false
160
+ encoder_attention_heads:
161
+ value: 12
162
+ encoder_ffn_dim:
163
+ value: 3072
164
+ encoder_layerdrop:
165
+ value: 0
166
+ encoder_layers:
167
+ value: 12
168
+ encoder_no_repeat_ngram_size:
169
+ value: 0
170
+ eos_token_id:
171
+ value: 50257
172
+ eval_accumulation_steps:
173
+ value: null
174
+ eval_delay:
175
+ value: 0
176
+ eval_do_concat_batches:
177
+ value: true
178
+ eval_on_start:
179
+ value: false
180
+ eval_steps:
181
+ value: 1000
182
+ eval_strategy:
183
+ value: steps
184
+ eval_use_gather_object:
185
+ value: false
186
+ evaluation_strategy:
187
+ value: steps
188
+ exponential_decay_length_penalty:
189
+ value: null
190
+ finetuning_task:
191
+ value: null
192
+ forced_bos_token_id:
193
+ value: null
194
+ forced_decoder_ids:
195
+ value: null
196
+ forced_eos_token_id:
197
+ value: null
198
+ fp16:
199
+ value: true
200
+ fp16_backend:
201
+ value: auto
202
+ fp16_full_eval:
203
+ value: false
204
+ fp16_opt_level:
205
+ value: O1
206
+ fsdp:
207
+ value: []
208
+ fsdp_config:
209
+ value:
210
+ min_num_params: 0
211
+ xla: false
212
+ xla_fsdp_grad_ckpt: false
213
+ xla_fsdp_v2: false
214
+ fsdp_min_num_params:
215
+ value: 0
216
+ fsdp_transformer_layer_cls_to_wrap:
217
+ value: null
218
+ full_determinism:
219
+ value: false
220
+ generation_config:
221
+ value: null
222
+ generation_max_length:
223
+ value: 225
224
+ generation_num_beams:
225
+ value: null
226
+ gradient_accumulation_steps:
227
+ value: 1
228
+ gradient_checkpointing:
229
+ value: true
230
+ gradient_checkpointing_kwargs:
231
+ value: null
232
+ greater_is_better:
233
+ value: false
234
+ group_by_length:
235
+ value: false
236
+ half_precision_backend:
237
+ value: auto
238
+ hub_always_push:
239
+ value: false
240
+ hub_model_id:
241
+ value: null
242
+ hub_private_repo:
243
+ value: null
244
+ hub_strategy:
245
+ value: every_save
246
+ hub_token:
247
+ value: <HUB_TOKEN>
248
+ id2label:
249
+ value:
250
+ "0": LABEL_0
251
+ "1": LABEL_1
252
+ ignore_data_skip:
253
+ value: false
254
+ include_for_metrics:
255
+ value: []
256
+ include_inputs_for_metrics:
257
+ value: false
258
+ include_num_input_tokens_seen:
259
+ value: false
260
+ include_tokens_per_second:
261
+ value: false
262
+ init_std:
263
+ value: 0.02
264
+ is_decoder:
265
+ value: false
266
+ is_encoder_decoder:
267
+ value: true
268
+ jit_mode_eval:
269
+ value: false
270
+ label_names:
271
+ value: null
272
+ label_smoothing_factor:
273
+ value: 0
274
+ label2id:
275
+ value:
276
+ LABEL_0: 0
277
+ LABEL_1: 1
278
+ learning_rate:
279
+ value: 1e-05
280
+ length_column_name:
281
+ value: input_length
282
+ length_penalty:
283
+ value: 1
284
+ load_best_model_at_end:
285
+ value: true
286
+ local_rank:
287
+ value: 0
288
+ log_level:
289
+ value: passive
290
+ log_level_replica:
291
+ value: warning
292
+ log_on_each_node:
293
+ value: true
294
+ logging_dir:
295
+ value: ./runs/Feb12_12-58-59_tknika
296
+ logging_first_step:
297
+ value: false
298
+ logging_nan_inf_filter:
299
+ value: true
300
+ logging_steps:
301
+ value: 25
302
+ logging_strategy:
303
+ value: steps
304
+ lr_scheduler_type:
305
+ value: linear
306
+ mask_feature_length:
307
+ value: 10
308
+ mask_feature_min_masks:
309
+ value: 0
310
+ mask_feature_prob:
311
+ value: 0
312
+ mask_time_length:
313
+ value: 10
314
+ mask_time_min_masks:
315
+ value: 2
316
+ mask_time_prob:
317
+ value: 0.05
318
+ max_grad_norm:
319
+ value: 1
320
+ max_length:
321
+ value: 448
322
+ max_source_positions:
323
+ value: 1500
324
+ max_steps:
325
+ value: 8000
326
+ max_target_positions:
327
+ value: 448
328
+ median_filter_width:
329
+ value: 7
330
+ metric_for_best_model:
331
+ value: wer
332
+ min_length:
333
+ value: 0
334
+ model/num_parameters:
335
+ value: 241734912
336
+ model_type:
337
+ value: whisper
338
+ mp_parameters:
339
+ value: ""
340
+ neftune_noise_alpha:
341
+ value: null
342
+ no_cuda:
343
+ value: false
344
+ no_repeat_ngram_size:
345
+ value: 0
346
+ num_beam_groups:
347
+ value: 1
348
+ num_beams:
349
+ value: 1
350
+ num_hidden_layers:
351
+ value: 12
352
+ num_mel_bins:
353
+ value: 80
354
+ num_return_sequences:
355
+ value: 1
356
+ num_train_epochs:
357
+ value: 3
358
+ optim:
359
+ value: adamw_torch
360
+ optim_args:
361
+ value: null
362
+ optim_target_modules:
363
+ value: null
364
+ output_attentions:
365
+ value: false
366
+ output_dir:
367
+ value: ./
368
+ output_hidden_states:
369
+ value: false
370
+ output_scores:
371
+ value: false
372
+ overwrite_output_dir:
373
+ value: true
374
+ pad_token_id:
375
+ value: 50257
376
+ past_index:
377
+ value: -1
378
+ per_device_eval_batch_size:
379
+ value: 16
380
+ per_device_train_batch_size:
381
+ value: 32
382
+ per_gpu_eval_batch_size:
383
+ value: null
384
+ per_gpu_train_batch_size:
385
+ value: null
386
+ predict_with_generate:
387
+ value: true
388
+ prediction_loss_only:
389
+ value: false
390
+ prefix:
391
+ value: null
392
+ problem_type:
393
+ value: null
394
+ push_to_hub:
395
+ value: true
396
+ push_to_hub_model_id:
397
+ value: null
398
+ push_to_hub_organization:
399
+ value: null
400
+ push_to_hub_token:
401
+ value: <PUSH_TO_HUB_TOKEN>
402
+ ray_scope:
403
+ value: last
404
+ remove_invalid_values:
405
+ value: false
406
+ remove_unused_columns:
407
+ value: true
408
+ repetition_penalty:
409
+ value: 1
410
+ report_to:
411
+ value:
412
+ - wandb
413
+ restore_callback_states_from_checkpoint:
414
+ value: false
415
+ resume_from_checkpoint:
416
+ value: null
417
+ return_dict:
418
+ value: true
419
+ return_dict_in_generate:
420
+ value: false
421
+ run_name:
422
+ value: whisper-small-eu
423
+ save_on_each_node:
424
+ value: false
425
+ save_only_model:
426
+ value: false
427
+ save_safetensors:
428
+ value: true
429
+ save_steps:
430
+ value: 1000
431
+ save_strategy:
432
+ value: steps
433
+ save_total_limit:
434
+ value: null
435
+ scale_embedding:
436
+ value: false
437
+ seed:
438
+ value: 42
439
+ sep_token_id:
440
+ value: null
441
+ skip_memory_metrics:
442
+ value: true
443
+ sortish_sampler:
444
+ value: false
445
+ split_batches:
446
+ value: null
447
+ suppress_tokens:
448
+ value: null
449
+ task_specific_params:
450
+ value: null
451
+ temperature:
452
+ value: 1
453
+ tf_legacy_loss:
454
+ value: false
455
+ tf32:
456
+ value: null
457
+ tie_encoder_decoder:
458
+ value: false
459
+ tie_word_embeddings:
460
+ value: true
461
+ tokenizer_class:
462
+ value: null
463
+ top_k:
464
+ value: 50
465
+ top_p:
466
+ value: 1
467
+ torch_compile:
468
+ value: false
469
+ torch_compile_backend:
470
+ value: null
471
+ torch_compile_mode:
472
+ value: null
473
+ torch_dtype:
474
+ value: float32
475
+ torch_empty_cache_steps:
476
+ value: null
477
+ torchdynamo:
478
+ value: null
479
+ torchscript:
480
+ value: false
481
+ tpu_metrics_debug:
482
+ value: false
483
+ tpu_num_cores:
484
+ value: null
485
+ transformers_version:
486
+ value: 4.49.0.dev0
487
+ typical_p:
488
+ value: 1
489
+ use_bfloat16:
490
+ value: false
491
+ use_cache:
492
+ value: false
493
+ use_cpu:
494
+ value: false
495
+ use_ipex:
496
+ value: false
497
+ use_legacy_prediction_loop:
498
+ value: false
499
+ use_liger_kernel:
500
+ value: false
501
+ use_mps_device:
502
+ value: false
503
+ use_weighted_layer_sum:
504
+ value: false
505
+ vocab_size:
506
+ value: 51865
507
+ warmup_ratio:
508
+ value: 0
509
+ warmup_steps:
510
+ value: 500
511
+ weight_decay:
512
+ value: 0