Xabi Ezpeleta commited on
Commit
c9036e7
·
1 Parent(s): 70fb6a7

Ignore wanb logs

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. wandb/debug-internal.log +0 -7
  3. wandb/debug.log +0 -25
  4. wandb/run-20250212_121751-d4i88lzt/files/config.yaml +0 -512
  5. wandb/run-20250212_121751-d4i88lzt/files/output.log +0 -22
  6. wandb/run-20250212_121751-d4i88lzt/files/requirements.txt +0 -115
  7. wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json +0 -85
  8. wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json +0 -1
  9. wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log +0 -14
  10. wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log +0 -15
  11. wandb/run-20250212_121751-d4i88lzt/logs/debug.log +0 -26
  12. wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb +0 -0
  13. wandb/run-20250212_122637-v3d3ouvn/files/config.yaml +0 -512
  14. wandb/run-20250212_122637-v3d3ouvn/files/output.log +0 -22
  15. wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt +0 -115
  16. wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json +0 -85
  17. wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json +0 -1
  18. wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log +0 -14
  19. wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log +0 -15
  20. wandb/run-20250212_122637-v3d3ouvn/logs/debug.log +0 -26
  21. wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb +0 -0
  22. wandb/run-20250212_122854-4m048f5s/files/config.yaml +0 -512
  23. wandb/run-20250212_122854-4m048f5s/files/output.log +0 -22
  24. wandb/run-20250212_122854-4m048f5s/files/requirements.txt +0 -115
  25. wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json +0 -85
  26. wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json +0 -1
  27. wandb/run-20250212_122854-4m048f5s/logs/debug-core.log +0 -14
  28. wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log +0 -15
  29. wandb/run-20250212_122854-4m048f5s/logs/debug.log +0 -26
  30. wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb +0 -0
  31. wandb/run-20250212_125202-c6xjc1gs/files/config.yaml +0 -512
  32. wandb/run-20250212_125202-c6xjc1gs/files/output.log +0 -22
  33. wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt +0 -115
  34. wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json +0 -85
  35. wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json +0 -1
  36. wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log +0 -14
  37. wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log +0 -15
  38. wandb/run-20250212_125202-c6xjc1gs/logs/debug.log +0 -26
  39. wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb +0 -0
  40. wandb/run-20250212_125924-xhsgsxqq/files/config.yaml +0 -512
  41. wandb/run-20250212_125924-xhsgsxqq/files/output.log +0 -22
  42. wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt +0 -115
  43. wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json +0 -85
  44. wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json +0 -1
  45. wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log +0 -14
  46. wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log +0 -15
  47. wandb/run-20250212_125924-xhsgsxqq/logs/debug.log +0 -26
  48. wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb +0 -0
  49. wandb/run-20250212_130533-zeu6vay4/files/config.yaml +0 -512
  50. wandb/run-20250212_130533-zeu6vay4/files/output.log +0 -22
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  checkpoint-*/
2
  nohup.out
3
  .venv/
 
 
1
  checkpoint-*/
2
  nohup.out
3
  .venv/
4
+ wandb/
wandb/debug-internal.log DELETED
@@ -1,7 +0,0 @@
1
- {"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
2
- {"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
3
- {"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
4
- {"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
5
- {"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
6
- {"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
7
- {"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
 
 
 
 
 
 
 
 
wandb/debug.log DELETED
@@ -1,25 +0,0 @@
1
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
3
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
7
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
8
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():756] calling init triggers
9
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
- config: {'_wandb': {}}
11
- 2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():789] starting backend
12
- 2025-02-12 15:27:10,107 INFO MainThread:243546 [wandb_init.py:init():793] sending inform_init request
13
- 2025-02-12 15:27:10,112 INFO MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-02-12 15:27:10,113 INFO MainThread:243546 [wandb_init.py:init():808] backend started and connected
15
- 2025-02-12 15:27:10,115 INFO MainThread:243546 [wandb_init.py:init():901] updated telemetry
16
- 2025-02-12 15:27:10,122 INFO MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
- 2025-02-12 15:27:10,584 INFO MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
18
- 2025-02-12 15:27:10,691 INFO MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
19
- 2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
- 2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
- 2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
22
- 2025-02-12 15:27:10,694 INFO MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
23
- 2025-02-12 15:27:10,698 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
- 2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
25
- 2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/files/config.yaml DELETED
@@ -1,512 +0,0 @@
1
- _attn_implementation_autoset:
2
- value: true
3
- _name_or_path:
4
- value: openai/whisper-small
5
- _wandb:
6
- value:
7
- cli_version: 0.19.6
8
- m:
9
- - "1": train/global_step
10
- "6":
11
- - 3
12
- "7": []
13
- python_version: 3.12.3
14
- t:
15
- "1":
16
- - 1
17
- - 5
18
- - 11
19
- - 49
20
- - 51
21
- - 53
22
- - 55
23
- - 71
24
- - 100
25
- "2":
26
- - 1
27
- - 5
28
- - 11
29
- - 49
30
- - 51
31
- - 53
32
- - 55
33
- - 71
34
- - 100
35
- "3":
36
- - 7
37
- - 13
38
- - 19
39
- - 23
40
- - 55
41
- - 66
42
- "4": 3.12.3
43
- "5": 0.19.6
44
- "6": 4.49.0.dev0
45
- "8":
46
- - 5
47
- "9":
48
- "1": transformers_trainer
49
- "12": 0.19.6
50
- "13": linux-x86_64
51
- accelerator_config:
52
- value:
53
- dispatch_batches: null
54
- even_batches: true
55
- gradient_accumulation_kwargs: null
56
- non_blocking: false
57
- split_batches: false
58
- use_seedable_sampler: true
59
- activation_dropout:
60
- value: 0
61
- activation_function:
62
- value: gelu
63
- adafactor:
64
- value: false
65
- adam_beta1:
66
- value: 0.9
67
- adam_beta2:
68
- value: 0.999
69
- adam_epsilon:
70
- value: 1e-08
71
- add_cross_attention:
72
- value: false
73
- apply_spec_augment:
74
- value: false
75
- architectures:
76
- value:
77
- - WhisperForConditionalGeneration
78
- attention_dropout:
79
- value: 0
80
- auto_find_batch_size:
81
- value: false
82
- average_tokens_across_devices:
83
- value: false
84
- bad_words_ids:
85
- value: null
86
- batch_eval_metrics:
87
- value: false
88
- begin_suppress_tokens:
89
- value:
90
- - 220
91
- - 50257
92
- bf16:
93
- value: false
94
- bf16_full_eval:
95
- value: false
96
- bos_token_id:
97
- value: 50257
98
- chunk_size_feed_forward:
99
- value: 0
100
- classifier_proj_size:
101
- value: 256
102
- cross_attention_hidden_size:
103
- value: null
104
- d_model:
105
- value: 768
106
- data_seed:
107
- value: null
108
- dataloader_drop_last:
109
- value: false
110
- dataloader_num_workers:
111
- value: 0
112
- dataloader_persistent_workers:
113
- value: false
114
- dataloader_pin_memory:
115
- value: true
116
- dataloader_prefetch_factor:
117
- value: null
118
- ddp_backend:
119
- value: null
120
- ddp_broadcast_buffers:
121
- value: null
122
- ddp_bucket_cap_mb:
123
- value: null
124
- ddp_find_unused_parameters:
125
- value: null
126
- ddp_timeout:
127
- value: 1800
128
- debug:
129
- value: []
130
- decoder_attention_heads:
131
- value: 12
132
- decoder_ffn_dim:
133
- value: 3072
134
- decoder_layerdrop:
135
- value: 0
136
- decoder_layers:
137
- value: 12
138
- decoder_start_token_id:
139
- value: 50258
140
- deepspeed:
141
- value: null
142
- disable_tqdm:
143
- value: false
144
- dispatch_batches:
145
- value: null
146
- diversity_penalty:
147
- value: 0
148
- do_eval:
149
- value: true
150
- do_predict:
151
- value: false
152
- do_sample:
153
- value: false
154
- do_train:
155
- value: true
156
- dropout:
157
- value: 0
158
- early_stopping:
159
- value: false
160
- encoder_attention_heads:
161
- value: 12
162
- encoder_ffn_dim:
163
- value: 3072
164
- encoder_layerdrop:
165
- value: 0
166
- encoder_layers:
167
- value: 12
168
- encoder_no_repeat_ngram_size:
169
- value: 0
170
- eos_token_id:
171
- value: 50257
172
- eval_accumulation_steps:
173
- value: null
174
- eval_delay:
175
- value: 0
176
- eval_do_concat_batches:
177
- value: true
178
- eval_on_start:
179
- value: false
180
- eval_steps:
181
- value: 1000
182
- eval_strategy:
183
- value: steps
184
- eval_use_gather_object:
185
- value: false
186
- evaluation_strategy:
187
- value: steps
188
- exponential_decay_length_penalty:
189
- value: null
190
- finetuning_task:
191
- value: null
192
- forced_bos_token_id:
193
- value: null
194
- forced_decoder_ids:
195
- value: null
196
- forced_eos_token_id:
197
- value: null
198
- fp16:
199
- value: true
200
- fp16_backend:
201
- value: auto
202
- fp16_full_eval:
203
- value: false
204
- fp16_opt_level:
205
- value: O1
206
- fsdp:
207
- value: []
208
- fsdp_config:
209
- value:
210
- min_num_params: 0
211
- xla: false
212
- xla_fsdp_grad_ckpt: false
213
- xla_fsdp_v2: false
214
- fsdp_min_num_params:
215
- value: 0
216
- fsdp_transformer_layer_cls_to_wrap:
217
- value: null
218
- full_determinism:
219
- value: false
220
- generation_config:
221
- value: null
222
- generation_max_length:
223
- value: 225
224
- generation_num_beams:
225
- value: null
226
- gradient_accumulation_steps:
227
- value: 1
228
- gradient_checkpointing:
229
- value: true
230
- gradient_checkpointing_kwargs:
231
- value: null
232
- greater_is_better:
233
- value: false
234
- group_by_length:
235
- value: false
236
- half_precision_backend:
237
- value: auto
238
- hub_always_push:
239
- value: false
240
- hub_model_id:
241
- value: null
242
- hub_private_repo:
243
- value: null
244
- hub_strategy:
245
- value: every_save
246
- hub_token:
247
- value: <HUB_TOKEN>
248
- id2label:
249
- value:
250
- "0": LABEL_0
251
- "1": LABEL_1
252
- ignore_data_skip:
253
- value: false
254
- include_for_metrics:
255
- value: []
256
- include_inputs_for_metrics:
257
- value: false
258
- include_num_input_tokens_seen:
259
- value: false
260
- include_tokens_per_second:
261
- value: false
262
- init_std:
263
- value: 0.02
264
- is_decoder:
265
- value: false
266
- is_encoder_decoder:
267
- value: true
268
- jit_mode_eval:
269
- value: false
270
- label_names:
271
- value: null
272
- label_smoothing_factor:
273
- value: 0
274
- label2id:
275
- value:
276
- LABEL_0: 0
277
- LABEL_1: 1
278
- learning_rate:
279
- value: 1e-05
280
- length_column_name:
281
- value: input_length
282
- length_penalty:
283
- value: 1
284
- load_best_model_at_end:
285
- value: true
286
- local_rank:
287
- value: 0
288
- log_level:
289
- value: passive
290
- log_level_replica:
291
- value: warning
292
- log_on_each_node:
293
- value: true
294
- logging_dir:
295
- value: ./runs/Feb12_12-17-27_tknika
296
- logging_first_step:
297
- value: false
298
- logging_nan_inf_filter:
299
- value: true
300
- logging_steps:
301
- value: 25
302
- logging_strategy:
303
- value: steps
304
- lr_scheduler_type:
305
- value: linear
306
- mask_feature_length:
307
- value: 10
308
- mask_feature_min_masks:
309
- value: 0
310
- mask_feature_prob:
311
- value: 0
312
- mask_time_length:
313
- value: 10
314
- mask_time_min_masks:
315
- value: 2
316
- mask_time_prob:
317
- value: 0.05
318
- max_grad_norm:
319
- value: 1
320
- max_length:
321
- value: 448
322
- max_source_positions:
323
- value: 1500
324
- max_steps:
325
- value: 8000
326
- max_target_positions:
327
- value: 448
328
- median_filter_width:
329
- value: 7
330
- metric_for_best_model:
331
- value: wer
332
- min_length:
333
- value: 0
334
- model/num_parameters:
335
- value: 241734912
336
- model_type:
337
- value: whisper
338
- mp_parameters:
339
- value: ""
340
- neftune_noise_alpha:
341
- value: null
342
- no_cuda:
343
- value: false
344
- no_repeat_ngram_size:
345
- value: 0
346
- num_beam_groups:
347
- value: 1
348
- num_beams:
349
- value: 1
350
- num_hidden_layers:
351
- value: 12
352
- num_mel_bins:
353
- value: 80
354
- num_return_sequences:
355
- value: 1
356
- num_train_epochs:
357
- value: 3
358
- optim:
359
- value: adamw_torch
360
- optim_args:
361
- value: null
362
- optim_target_modules:
363
- value: null
364
- output_attentions:
365
- value: false
366
- output_dir:
367
- value: ./
368
- output_hidden_states:
369
- value: false
370
- output_scores:
371
- value: false
372
- overwrite_output_dir:
373
- value: true
374
- pad_token_id:
375
- value: 50257
376
- past_index:
377
- value: -1
378
- per_device_eval_batch_size:
379
- value: 16
380
- per_device_train_batch_size:
381
- value: 32
382
- per_gpu_eval_batch_size:
383
- value: null
384
- per_gpu_train_batch_size:
385
- value: null
386
- predict_with_generate:
387
- value: true
388
- prediction_loss_only:
389
- value: false
390
- prefix:
391
- value: null
392
- problem_type:
393
- value: null
394
- push_to_hub:
395
- value: true
396
- push_to_hub_model_id:
397
- value: null
398
- push_to_hub_organization:
399
- value: null
400
- push_to_hub_token:
401
- value: <PUSH_TO_HUB_TOKEN>
402
- ray_scope:
403
- value: last
404
- remove_invalid_values:
405
- value: false
406
- remove_unused_columns:
407
- value: true
408
- repetition_penalty:
409
- value: 1
410
- report_to:
411
- value:
412
- - wandb
413
- restore_callback_states_from_checkpoint:
414
- value: false
415
- resume_from_checkpoint:
416
- value: null
417
- return_dict:
418
- value: true
419
- return_dict_in_generate:
420
- value: false
421
- run_name:
422
- value: whisper-small-eu
423
- save_on_each_node:
424
- value: false
425
- save_only_model:
426
- value: false
427
- save_safetensors:
428
- value: true
429
- save_steps:
430
- value: 1000
431
- save_strategy:
432
- value: steps
433
- save_total_limit:
434
- value: null
435
- scale_embedding:
436
- value: false
437
- seed:
438
- value: 42
439
- sep_token_id:
440
- value: null
441
- skip_memory_metrics:
442
- value: true
443
- sortish_sampler:
444
- value: false
445
- split_batches:
446
- value: null
447
- suppress_tokens:
448
- value: null
449
- task_specific_params:
450
- value: null
451
- temperature:
452
- value: 1
453
- tf_legacy_loss:
454
- value: false
455
- tf32:
456
- value: null
457
- tie_encoder_decoder:
458
- value: false
459
- tie_word_embeddings:
460
- value: true
461
- tokenizer_class:
462
- value: null
463
- top_k:
464
- value: 50
465
- top_p:
466
- value: 1
467
- torch_compile:
468
- value: false
469
- torch_compile_backend:
470
- value: null
471
- torch_compile_mode:
472
- value: null
473
- torch_dtype:
474
- value: float32
475
- torch_empty_cache_steps:
476
- value: null
477
- torchdynamo:
478
- value: null
479
- torchscript:
480
- value: false
481
- tpu_metrics_debug:
482
- value: false
483
- tpu_num_cores:
484
- value: null
485
- transformers_version:
486
- value: 4.49.0.dev0
487
- typical_p:
488
- value: 1
489
- use_bfloat16:
490
- value: false
491
- use_cache:
492
- value: false
493
- use_cpu:
494
- value: false
495
- use_ipex:
496
- value: false
497
- use_legacy_prediction_loop:
498
- value: false
499
- use_liger_kernel:
500
- value: false
501
- use_mps_device:
502
- value: false
503
- use_weighted_layer_sum:
504
- value: false
505
- vocab_size:
506
- value: 51865
507
- warmup_ratio:
508
- value: 0
509
- warmup_steps:
510
- value: 500
511
- weight_decay:
512
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/files/output.log DELETED
@@ -1,22 +0,0 @@
1
- 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
- main()
4
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
- train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
- return inner_training_loop(
9
- ^^^^^^^^^^^^^^^^^^^^
10
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
- self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
- return self.call_event("on_epoch_begin", args, state, control)
15
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
- result = getattr(callback, event)(
18
- ^^^^^^^^^^^^^^^^^^^^^^^^^
19
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
- if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
- ^^^^^^^^^^^^^^^^^^^^^^^^
22
- AttributeError: 'NoneType' object has no attribute 'dataset'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/files/requirements.txt DELETED
@@ -1,115 +0,0 @@
1
- aiosignal==1.3.2
2
- Markdown==3.7
3
- more-itertools==10.6.0
4
- requests==2.32.3
5
- sentry-sdk==2.21.0
6
- torchaudio==2.6.0
7
- charset-normalizer==3.4.1
8
- docker-pycreds==0.4.0
9
- nvidia-cusolver-cu12==11.6.1.9
10
- PyYAML==6.0.2
11
- librosa==0.10.2.post1
12
- soxr==0.5.0.post1
13
- multiprocess==0.70.16
14
- setuptools==75.8.0
15
- nvidia-cufft-cu12==11.2.1.3
16
- joblib==1.4.2
17
- pytz==2025.1
18
- pip==24.0
19
- scikit-learn==1.6.1
20
- certifi==2025.1.31
21
- jiwer==3.1.0
22
- regex==2024.11.6
23
- annotated-types==0.7.0
24
- grpcio==1.70.0
25
- msgpack==1.1.0
26
- mpmath==1.3.0
27
- nvidia-cudnn-cu12==9.1.0.70
28
- soundfile==0.13.1
29
- dill==0.3.8
30
- nvidia-nvtx-cu12==12.4.127
31
- six==1.17.0
32
- nvidia-cuda-cupti-cu12==12.4.127
33
- pyarrow==19.0.0
34
- nvidia-nccl-cu12==2.21.5
35
- psutil==6.1.1
36
- decorator==5.1.1
37
- llvmlite==0.44.0
38
- frozenlist==1.5.0
39
- pydantic==2.10.6
40
- networkx==3.4.2
41
- idna==3.10
42
- wandb==0.19.6
43
- aiohttp==3.11.12
44
- RapidFuzz==3.12.1
45
- pandas==2.2.3
46
- python-dateutil==2.9.0.post0
47
- numpy==2.1.3
48
- tokenizers==0.21.0
49
- nvidia-cusparselt-cu12==0.6.2
50
- typing_extensions==4.12.2
51
- urllib3==2.3.0
52
- setproctitle==1.3.4
53
- tzdata==2025.1
54
- sympy==1.13.1
55
- pooch==1.8.2
56
- click==8.1.8
57
- pydantic_core==2.27.2
58
- MarkupSafe==3.0.2
59
- scipy==1.15.1
60
- accelerate==1.3.0
61
- tensorboard==2.19.0
62
- protobuf==5.29.3
63
- gitdb==4.0.12
64
- smmap==5.0.2
65
- absl-py==2.1.0
66
- tqdm==4.67.1
67
- yarl==1.18.3
68
- pycparser==2.22
69
- nvidia-cusparse-cu12==12.3.1.170
70
- attrs==25.1.0
71
- lazy_loader==0.4
72
- tensorboard-data-server==0.7.2
73
- threadpoolctl==3.5.0
74
- GitPython==3.1.44
75
- safetensors==0.5.2
76
- fsspec==2024.12.0
77
- nvidia-cuda-nvrtc-cu12==12.4.127
78
- filelock==3.17.0
79
- aiohappyeyeballs==2.4.6
80
- packaging==24.2
81
- datasets==3.2.1.dev0
82
- audioread==3.0.1
83
- propcache==0.2.1
84
- transformers==4.49.0.dev0
85
- nvidia-cuda-runtime-cu12==12.4.127
86
- cffi==1.17.1
87
- evaluate==0.4.3
88
- Werkzeug==3.1.3
89
- huggingface-hub==0.28.1
90
- Jinja2==3.1.5
91
- torch==2.6.0
92
- nvidia-curand-cu12==10.3.5.147
93
- xxhash==3.5.0
94
- platformdirs==4.3.6
95
- multidict==6.1.0
96
- nvidia-cublas-cu12==12.4.5.8
97
- nvidia-nvjitlink-cu12==12.4.127
98
- triton==3.2.0
99
- numba==0.61.0
100
- importlib_metadata==8.0.0
101
- platformdirs==4.2.2
102
- typeguard==4.3.0
103
- more-itertools==10.3.0
104
- tomli==2.0.1
105
- autocommand==2.2.2
106
- zipp==3.19.2
107
- typing_extensions==4.12.2
108
- backports.tarfile==1.2.0
109
- inflect==7.3.1
110
- jaraco.text==3.12.1
111
- wheel==0.43.0
112
- packaging==24.2
113
- jaraco.collections==5.1.0
114
- jaraco.functools==4.0.1
115
- jaraco.context==5.3.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
- "python": "CPython 3.12.3",
4
- "startedAt": "2025-02-12T12:17:51.527114Z",
5
- "args": [
6
- "--model_name_or_path=openai/whisper-small",
7
- "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
- "--language=basque",
9
- "--train_split_name=train",
10
- "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
11
- "--model_index_name=Whisper Small Basque",
12
- "--max_steps=8000",
13
- "--output_dir=./",
14
- "--per_device_train_batch_size=32",
15
- "--per_device_eval_batch_size=16",
16
- "--gradient_accumulation_steps=1",
17
- "--logging_steps=25",
18
- "--learning_rate=1e-5",
19
- "--warmup_steps=500",
20
- "--evaluation_strategy=steps",
21
- "--eval_steps=1000",
22
- "--save_strategy=steps",
23
- "--save_steps=1000",
24
- "--generation_max_length=225",
25
- "--length_column_name=input_length",
26
- "--max_duration_in_seconds=30",
27
- "--text_column_name=sentence",
28
- "--freeze_feature_encoder=False",
29
- "--report_to=tensorboard",
30
- "--metric_for_best_model=wer",
31
- "--greater_is_better=False",
32
- "--load_best_model_at_end",
33
- "--gradient_checkpointing",
34
- "--fp16",
35
- "--overwrite_output_dir",
36
- "--do_train",
37
- "--do_eval",
38
- "--predict_with_generate",
39
- "--do_normalize_eval",
40
- "--streaming",
41
- "--use_auth_token",
42
- "--push_to_hub",
43
- "--report_to",
44
- "wandb",
45
- "--run_name",
46
- "whisper-small-eu"
47
- ],
48
- "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
- "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
- "git": {
51
- "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
- "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
- },
54
- "email": "[email protected]",
55
- "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
- "host": "tknika",
57
- "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
- "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
- "cpu_count": 8,
60
- "cpu_count_logical": 8,
61
- "gpu": "NVIDIA L40-48Q",
62
- "gpu_count": 1,
63
- "disk": {
64
- "/": {
65
- "total": "525987168256",
66
- "used": "297346564096"
67
- }
68
- },
69
- "memory": {
70
- "total": "33654022144"
71
- },
72
- "cpu": {
73
- "count": 8,
74
- "countLogical": 8
75
- },
76
- "gpu_nvidia": [
77
- {
78
- "name": "NVIDIA L40-48Q",
79
- "memoryTotal": "51539607552",
80
- "cudaCores": 18176,
81
- "architecture": "Ada"
82
- }
83
- ],
84
- "cudaVersion": "12.4"
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":0}}
 
 
wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
- {"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392}
3
- {"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}}
4
- {"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"}
5
- {"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
6
- {"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
7
- {"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"}
8
- {"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"}
9
- {"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"}
10
- {"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"}
11
- {"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"}
12
- {"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"}
13
- {"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"}
14
- {"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log DELETED
@@ -1,15 +0,0 @@
1
- {"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"}
2
- {"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"}
3
- {"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"}
4
- {"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"}
5
- {"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"}
6
- {"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"}
7
- {"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"}
8
- {"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"}
9
- {"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"}
10
- {"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"}
11
- {"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
- {"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"}
13
- {"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"}
14
- {"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"}
15
- {"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392
3
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
7
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
8
- 2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:init():756] calling init triggers
9
- 2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
- config: {'_wandb': {}}
11
- 2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():789] starting backend
12
- 2025-02-12 12:17:51,521 INFO MainThread:223392 [wandb_init.py:init():793] sending inform_init request
13
- 2025-02-12 12:17:51,526 INFO MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-02-12 12:17:51,526 INFO MainThread:223392 [wandb_init.py:init():808] backend started and connected
15
- 2025-02-12 12:17:51,528 INFO MainThread:223392 [wandb_init.py:init():901] updated telemetry
16
- 2025-02-12 12:17:51,535 INFO MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
- 2025-02-12 12:17:51,944 INFO MainThread:223392 [wandb_init.py:init():994] starting run threads in backend
18
- 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg
19
- 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
- 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
- 2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed.
22
- 2025-02-12 12:17:52,051 INFO MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process
23
- 2025-02-12 12:17:52,052 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
- 2025-02-12 12:17:52,054 INFO MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75ef87e92c00>>
25
- 2025-02-12 12:17:52,055 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
- 2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb DELETED
Binary file (11.3 kB)
 
wandb/run-20250212_122637-v3d3ouvn/files/config.yaml DELETED
@@ -1,512 +0,0 @@
1
- _attn_implementation_autoset:
2
- value: true
3
- _name_or_path:
4
- value: openai/whisper-small
5
- _wandb:
6
- value:
7
- cli_version: 0.19.6
8
- m:
9
- - "1": train/global_step
10
- "6":
11
- - 3
12
- "7": []
13
- python_version: 3.12.3
14
- t:
15
- "1":
16
- - 1
17
- - 5
18
- - 11
19
- - 49
20
- - 51
21
- - 53
22
- - 55
23
- - 71
24
- - 100
25
- "2":
26
- - 1
27
- - 5
28
- - 11
29
- - 49
30
- - 51
31
- - 53
32
- - 55
33
- - 71
34
- - 100
35
- "3":
36
- - 7
37
- - 13
38
- - 19
39
- - 23
40
- - 55
41
- - 66
42
- "4": 3.12.3
43
- "5": 0.19.6
44
- "6": 4.49.0.dev0
45
- "8":
46
- - 5
47
- "9":
48
- "1": transformers_trainer
49
- "12": 0.19.6
50
- "13": linux-x86_64
51
- accelerator_config:
52
- value:
53
- dispatch_batches: null
54
- even_batches: true
55
- gradient_accumulation_kwargs: null
56
- non_blocking: false
57
- split_batches: false
58
- use_seedable_sampler: true
59
- activation_dropout:
60
- value: 0
61
- activation_function:
62
- value: gelu
63
- adafactor:
64
- value: false
65
- adam_beta1:
66
- value: 0.9
67
- adam_beta2:
68
- value: 0.999
69
- adam_epsilon:
70
- value: 1e-08
71
- add_cross_attention:
72
- value: false
73
- apply_spec_augment:
74
- value: false
75
- architectures:
76
- value:
77
- - WhisperForConditionalGeneration
78
- attention_dropout:
79
- value: 0
80
- auto_find_batch_size:
81
- value: false
82
- average_tokens_across_devices:
83
- value: false
84
- bad_words_ids:
85
- value: null
86
- batch_eval_metrics:
87
- value: false
88
- begin_suppress_tokens:
89
- value:
90
- - 220
91
- - 50257
92
- bf16:
93
- value: false
94
- bf16_full_eval:
95
- value: false
96
- bos_token_id:
97
- value: 50257
98
- chunk_size_feed_forward:
99
- value: 0
100
- classifier_proj_size:
101
- value: 256
102
- cross_attention_hidden_size:
103
- value: null
104
- d_model:
105
- value: 768
106
- data_seed:
107
- value: null
108
- dataloader_drop_last:
109
- value: false
110
- dataloader_num_workers:
111
- value: 0
112
- dataloader_persistent_workers:
113
- value: false
114
- dataloader_pin_memory:
115
- value: true
116
- dataloader_prefetch_factor:
117
- value: null
118
- ddp_backend:
119
- value: null
120
- ddp_broadcast_buffers:
121
- value: null
122
- ddp_bucket_cap_mb:
123
- value: null
124
- ddp_find_unused_parameters:
125
- value: null
126
- ddp_timeout:
127
- value: 1800
128
- debug:
129
- value: []
130
- decoder_attention_heads:
131
- value: 12
132
- decoder_ffn_dim:
133
- value: 3072
134
- decoder_layerdrop:
135
- value: 0
136
- decoder_layers:
137
- value: 12
138
- decoder_start_token_id:
139
- value: 50258
140
- deepspeed:
141
- value: null
142
- disable_tqdm:
143
- value: false
144
- dispatch_batches:
145
- value: null
146
- diversity_penalty:
147
- value: 0
148
- do_eval:
149
- value: true
150
- do_predict:
151
- value: false
152
- do_sample:
153
- value: false
154
- do_train:
155
- value: true
156
- dropout:
157
- value: 0
158
- early_stopping:
159
- value: false
160
- encoder_attention_heads:
161
- value: 12
162
- encoder_ffn_dim:
163
- value: 3072
164
- encoder_layerdrop:
165
- value: 0
166
- encoder_layers:
167
- value: 12
168
- encoder_no_repeat_ngram_size:
169
- value: 0
170
- eos_token_id:
171
- value: 50257
172
- eval_accumulation_steps:
173
- value: null
174
- eval_delay:
175
- value: 0
176
- eval_do_concat_batches:
177
- value: true
178
- eval_on_start:
179
- value: false
180
- eval_steps:
181
- value: 1000
182
- eval_strategy:
183
- value: steps
184
- eval_use_gather_object:
185
- value: false
186
- evaluation_strategy:
187
- value: steps
188
- exponential_decay_length_penalty:
189
- value: null
190
- finetuning_task:
191
- value: null
192
- forced_bos_token_id:
193
- value: null
194
- forced_decoder_ids:
195
- value: null
196
- forced_eos_token_id:
197
- value: null
198
- fp16:
199
- value: true
200
- fp16_backend:
201
- value: auto
202
- fp16_full_eval:
203
- value: false
204
- fp16_opt_level:
205
- value: O1
206
- fsdp:
207
- value: []
208
- fsdp_config:
209
- value:
210
- min_num_params: 0
211
- xla: false
212
- xla_fsdp_grad_ckpt: false
213
- xla_fsdp_v2: false
214
- fsdp_min_num_params:
215
- value: 0
216
- fsdp_transformer_layer_cls_to_wrap:
217
- value: null
218
- full_determinism:
219
- value: false
220
- generation_config:
221
- value: null
222
- generation_max_length:
223
- value: 225
224
- generation_num_beams:
225
- value: null
226
- gradient_accumulation_steps:
227
- value: 1
228
- gradient_checkpointing:
229
- value: true
230
- gradient_checkpointing_kwargs:
231
- value: null
232
- greater_is_better:
233
- value: false
234
- group_by_length:
235
- value: false
236
- half_precision_backend:
237
- value: auto
238
- hub_always_push:
239
- value: false
240
- hub_model_id:
241
- value: null
242
- hub_private_repo:
243
- value: null
244
- hub_strategy:
245
- value: every_save
246
- hub_token:
247
- value: <HUB_TOKEN>
248
- id2label:
249
- value:
250
- "0": LABEL_0
251
- "1": LABEL_1
252
- ignore_data_skip:
253
- value: false
254
- include_for_metrics:
255
- value: []
256
- include_inputs_for_metrics:
257
- value: false
258
- include_num_input_tokens_seen:
259
- value: false
260
- include_tokens_per_second:
261
- value: false
262
- init_std:
263
- value: 0.02
264
- is_decoder:
265
- value: false
266
- is_encoder_decoder:
267
- value: true
268
- jit_mode_eval:
269
- value: false
270
- label_names:
271
- value: null
272
- label_smoothing_factor:
273
- value: 0
274
- label2id:
275
- value:
276
- LABEL_0: 0
277
- LABEL_1: 1
278
- learning_rate:
279
- value: 1e-05
280
- length_column_name:
281
- value: input_length
282
- length_penalty:
283
- value: 1
284
- load_best_model_at_end:
285
- value: true
286
- local_rank:
287
- value: 0
288
- log_level:
289
- value: passive
290
- log_level_replica:
291
- value: warning
292
- log_on_each_node:
293
- value: true
294
- logging_dir:
295
- value: ./runs/Feb12_12-26-11_tknika
296
- logging_first_step:
297
- value: false
298
- logging_nan_inf_filter:
299
- value: true
300
- logging_steps:
301
- value: 25
302
- logging_strategy:
303
- value: steps
304
- lr_scheduler_type:
305
- value: linear
306
- mask_feature_length:
307
- value: 10
308
- mask_feature_min_masks:
309
- value: 0
310
- mask_feature_prob:
311
- value: 0
312
- mask_time_length:
313
- value: 10
314
- mask_time_min_masks:
315
- value: 2
316
- mask_time_prob:
317
- value: 0.05
318
- max_grad_norm:
319
- value: 1
320
- max_length:
321
- value: 448
322
- max_source_positions:
323
- value: 1500
324
- max_steps:
325
- value: 8000
326
- max_target_positions:
327
- value: 448
328
- median_filter_width:
329
- value: 7
330
- metric_for_best_model:
331
- value: wer
332
- min_length:
333
- value: 0
334
- model/num_parameters:
335
- value: 241734912
336
- model_type:
337
- value: whisper
338
- mp_parameters:
339
- value: ""
340
- neftune_noise_alpha:
341
- value: null
342
- no_cuda:
343
- value: false
344
- no_repeat_ngram_size:
345
- value: 0
346
- num_beam_groups:
347
- value: 1
348
- num_beams:
349
- value: 1
350
- num_hidden_layers:
351
- value: 12
352
- num_mel_bins:
353
- value: 80
354
- num_return_sequences:
355
- value: 1
356
- num_train_epochs:
357
- value: 3
358
- optim:
359
- value: adamw_torch
360
- optim_args:
361
- value: null
362
- optim_target_modules:
363
- value: null
364
- output_attentions:
365
- value: false
366
- output_dir:
367
- value: ./
368
- output_hidden_states:
369
- value: false
370
- output_scores:
371
- value: false
372
- overwrite_output_dir:
373
- value: true
374
- pad_token_id:
375
- value: 50257
376
- past_index:
377
- value: -1
378
- per_device_eval_batch_size:
379
- value: 16
380
- per_device_train_batch_size:
381
- value: 32
382
- per_gpu_eval_batch_size:
383
- value: null
384
- per_gpu_train_batch_size:
385
- value: null
386
- predict_with_generate:
387
- value: true
388
- prediction_loss_only:
389
- value: false
390
- prefix:
391
- value: null
392
- problem_type:
393
- value: null
394
- push_to_hub:
395
- value: true
396
- push_to_hub_model_id:
397
- value: null
398
- push_to_hub_organization:
399
- value: null
400
- push_to_hub_token:
401
- value: <PUSH_TO_HUB_TOKEN>
402
- ray_scope:
403
- value: last
404
- remove_invalid_values:
405
- value: false
406
- remove_unused_columns:
407
- value: true
408
- repetition_penalty:
409
- value: 1
410
- report_to:
411
- value:
412
- - wandb
413
- restore_callback_states_from_checkpoint:
414
- value: false
415
- resume_from_checkpoint:
416
- value: null
417
- return_dict:
418
- value: true
419
- return_dict_in_generate:
420
- value: false
421
- run_name:
422
- value: whisper-small-eu
423
- save_on_each_node:
424
- value: false
425
- save_only_model:
426
- value: false
427
- save_safetensors:
428
- value: true
429
- save_steps:
430
- value: 1000
431
- save_strategy:
432
- value: steps
433
- save_total_limit:
434
- value: null
435
- scale_embedding:
436
- value: false
437
- seed:
438
- value: 42
439
- sep_token_id:
440
- value: null
441
- skip_memory_metrics:
442
- value: true
443
- sortish_sampler:
444
- value: false
445
- split_batches:
446
- value: null
447
- suppress_tokens:
448
- value: null
449
- task_specific_params:
450
- value: null
451
- temperature:
452
- value: 1
453
- tf_legacy_loss:
454
- value: false
455
- tf32:
456
- value: null
457
- tie_encoder_decoder:
458
- value: false
459
- tie_word_embeddings:
460
- value: true
461
- tokenizer_class:
462
- value: null
463
- top_k:
464
- value: 50
465
- top_p:
466
- value: 1
467
- torch_compile:
468
- value: false
469
- torch_compile_backend:
470
- value: null
471
- torch_compile_mode:
472
- value: null
473
- torch_dtype:
474
- value: float32
475
- torch_empty_cache_steps:
476
- value: null
477
- torchdynamo:
478
- value: null
479
- torchscript:
480
- value: false
481
- tpu_metrics_debug:
482
- value: false
483
- tpu_num_cores:
484
- value: null
485
- transformers_version:
486
- value: 4.49.0.dev0
487
- typical_p:
488
- value: 1
489
- use_bfloat16:
490
- value: false
491
- use_cache:
492
- value: false
493
- use_cpu:
494
- value: false
495
- use_ipex:
496
- value: false
497
- use_legacy_prediction_loop:
498
- value: false
499
- use_liger_kernel:
500
- value: false
501
- use_mps_device:
502
- value: false
503
- use_weighted_layer_sum:
504
- value: false
505
- vocab_size:
506
- value: 51865
507
- warmup_ratio:
508
- value: 0
509
- warmup_steps:
510
- value: 500
511
- weight_decay:
512
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122637-v3d3ouvn/files/output.log DELETED
@@ -1,22 +0,0 @@
1
- 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
- main()
4
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
- train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
- return inner_training_loop(
9
- ^^^^^^^^^^^^^^^^^^^^
10
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
- self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
- return self.call_event("on_epoch_begin", args, state, control)
15
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
- result = getattr(callback, event)(
18
- ^^^^^^^^^^^^^^^^^^^^^^^^^
19
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
- if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
- ^^^^^^^^^^^^^^^^^^^^^^^^
22
- AttributeError: 'NoneType' object has no attribute 'dataset'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt DELETED
@@ -1,115 +0,0 @@
1
- aiosignal==1.3.2
2
- Markdown==3.7
3
- more-itertools==10.6.0
4
- requests==2.32.3
5
- sentry-sdk==2.21.0
6
- torchaudio==2.6.0
7
- charset-normalizer==3.4.1
8
- docker-pycreds==0.4.0
9
- nvidia-cusolver-cu12==11.6.1.9
10
- PyYAML==6.0.2
11
- librosa==0.10.2.post1
12
- soxr==0.5.0.post1
13
- multiprocess==0.70.16
14
- setuptools==75.8.0
15
- nvidia-cufft-cu12==11.2.1.3
16
- joblib==1.4.2
17
- pytz==2025.1
18
- pip==24.0
19
- scikit-learn==1.6.1
20
- certifi==2025.1.31
21
- jiwer==3.1.0
22
- regex==2024.11.6
23
- annotated-types==0.7.0
24
- grpcio==1.70.0
25
- msgpack==1.1.0
26
- mpmath==1.3.0
27
- nvidia-cudnn-cu12==9.1.0.70
28
- soundfile==0.13.1
29
- dill==0.3.8
30
- nvidia-nvtx-cu12==12.4.127
31
- six==1.17.0
32
- nvidia-cuda-cupti-cu12==12.4.127
33
- pyarrow==19.0.0
34
- nvidia-nccl-cu12==2.21.5
35
- psutil==6.1.1
36
- decorator==5.1.1
37
- llvmlite==0.44.0
38
- frozenlist==1.5.0
39
- pydantic==2.10.6
40
- networkx==3.4.2
41
- idna==3.10
42
- wandb==0.19.6
43
- aiohttp==3.11.12
44
- RapidFuzz==3.12.1
45
- pandas==2.2.3
46
- python-dateutil==2.9.0.post0
47
- numpy==2.1.3
48
- tokenizers==0.21.0
49
- nvidia-cusparselt-cu12==0.6.2
50
- typing_extensions==4.12.2
51
- urllib3==2.3.0
52
- setproctitle==1.3.4
53
- tzdata==2025.1
54
- sympy==1.13.1
55
- pooch==1.8.2
56
- click==8.1.8
57
- pydantic_core==2.27.2
58
- MarkupSafe==3.0.2
59
- scipy==1.15.1
60
- accelerate==1.3.0
61
- tensorboard==2.19.0
62
- protobuf==5.29.3
63
- gitdb==4.0.12
64
- smmap==5.0.2
65
- absl-py==2.1.0
66
- tqdm==4.67.1
67
- yarl==1.18.3
68
- pycparser==2.22
69
- nvidia-cusparse-cu12==12.3.1.170
70
- attrs==25.1.0
71
- lazy_loader==0.4
72
- tensorboard-data-server==0.7.2
73
- threadpoolctl==3.5.0
74
- GitPython==3.1.44
75
- safetensors==0.5.2
76
- fsspec==2024.12.0
77
- nvidia-cuda-nvrtc-cu12==12.4.127
78
- filelock==3.17.0
79
- aiohappyeyeballs==2.4.6
80
- packaging==24.2
81
- datasets==3.2.1.dev0
82
- audioread==3.0.1
83
- propcache==0.2.1
84
- transformers==4.49.0.dev0
85
- nvidia-cuda-runtime-cu12==12.4.127
86
- cffi==1.17.1
87
- evaluate==0.4.3
88
- Werkzeug==3.1.3
89
- huggingface-hub==0.28.1
90
- Jinja2==3.1.5
91
- torch==2.6.0
92
- nvidia-curand-cu12==10.3.5.147
93
- xxhash==3.5.0
94
- platformdirs==4.3.6
95
- multidict==6.1.0
96
- nvidia-cublas-cu12==12.4.5.8
97
- nvidia-nvjitlink-cu12==12.4.127
98
- triton==3.2.0
99
- numba==0.61.0
100
- importlib_metadata==8.0.0
101
- platformdirs==4.2.2
102
- typeguard==4.3.0
103
- more-itertools==10.3.0
104
- tomli==2.0.1
105
- autocommand==2.2.2
106
- zipp==3.19.2
107
- typing_extensions==4.12.2
108
- backports.tarfile==1.2.0
109
- inflect==7.3.1
110
- jaraco.text==3.12.1
111
- wheel==0.43.0
112
- packaging==24.2
113
- jaraco.collections==5.1.0
114
- jaraco.functools==4.0.1
115
- jaraco.context==5.3.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
- "python": "CPython 3.12.3",
4
- "startedAt": "2025-02-12T12:26:37.277902Z",
5
- "args": [
6
- "--model_name_or_path=openai/whisper-small",
7
- "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
- "--language=basque",
9
- "--train_split_name=train",
10
- "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
11
- "--model_index_name=Whisper Small Basque",
12
- "--max_steps=8000",
13
- "--output_dir=./",
14
- "--per_device_train_batch_size=32",
15
- "--per_device_eval_batch_size=16",
16
- "--gradient_accumulation_steps=1",
17
- "--logging_steps=25",
18
- "--learning_rate=1e-5",
19
- "--warmup_steps=500",
20
- "--evaluation_strategy=steps",
21
- "--eval_steps=1000",
22
- "--save_strategy=steps",
23
- "--save_steps=1000",
24
- "--generation_max_length=225",
25
- "--length_column_name=input_length",
26
- "--max_duration_in_seconds=30",
27
- "--text_column_name=sentence",
28
- "--freeze_feature_encoder=False",
29
- "--report_to=tensorboard",
30
- "--metric_for_best_model=wer",
31
- "--greater_is_better=False",
32
- "--load_best_model_at_end",
33
- "--gradient_checkpointing",
34
- "--fp16",
35
- "--overwrite_output_dir",
36
- "--do_train",
37
- "--do_eval",
38
- "--predict_with_generate",
39
- "--do_normalize_eval",
40
- "--streaming",
41
- "--use_auth_token",
42
- "--push_to_hub",
43
- "--report_to",
44
- "wandb",
45
- "--run_name",
46
- "whisper-small-eu"
47
- ],
48
- "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
- "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
- "git": {
51
- "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
- "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
- },
54
- "email": "[email protected]",
55
- "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
- "host": "tknika",
57
- "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
- "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
- "cpu_count": 8,
60
- "cpu_count_logical": 8,
61
- "gpu": "NVIDIA L40-48Q",
62
- "gpu_count": 1,
63
- "disk": {
64
- "/": {
65
- "total": "525987168256",
66
- "used": "297346666496"
67
- }
68
- },
69
- "memory": {
70
- "total": "33654022144"
71
- },
72
- "cpu": {
73
- "count": 8,
74
- "countLogical": 8
75
- },
76
- "gpu_nvidia": [
77
- {
78
- "name": "NVIDIA L40-48Q",
79
- "memoryTotal": "51539607552",
80
- "cudaCores": 18176,
81
- "architecture": "Ada"
82
- }
83
- ],
84
- "cudaVersion": "12.4"
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":0}}
 
 
wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
- {"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110}
3
- {"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}}
4
- {"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"}
5
- {"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
6
- {"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
7
- {"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"}
8
- {"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"}
9
- {"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"}
10
- {"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"}
11
- {"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"}
12
- {"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"}
13
- {"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"}
14
- {"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log DELETED
@@ -1,15 +0,0 @@
1
- {"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"}
2
- {"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"}
3
- {"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"}
4
- {"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"}
5
- {"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"}
6
- {"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"}
7
- {"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"}
8
- {"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"}
9
- {"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"}
10
- {"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"}
11
- {"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
- {"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"}
13
- {"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"}
14
- {"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"}
15
- {"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122637-v3d3ouvn/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110
3
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
7
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
8
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():756] calling init triggers
9
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
- config: {'_wandb': {}}
11
- 2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():789] starting backend
12
- 2025-02-12 12:26:37,272 INFO MainThread:224110 [wandb_init.py:init():793] sending inform_init request
13
- 2025-02-12 12:26:37,277 INFO MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-02-12 12:26:37,277 INFO MainThread:224110 [wandb_init.py:init():808] backend started and connected
15
- 2025-02-12 12:26:37,279 INFO MainThread:224110 [wandb_init.py:init():901] updated telemetry
16
- 2025-02-12 12:26:37,285 INFO MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
- 2025-02-12 12:26:37,653 INFO MainThread:224110 [wandb_init.py:init():994] starting run threads in backend
18
- 2025-02-12 12:26:37,764 INFO MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg
19
- 2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
- 2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
- 2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed.
22
- 2025-02-12 12:26:37,766 INFO MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process
23
- 2025-02-12 12:26:37,767 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
- 2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7a5cbc15a330>>
25
- 2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
- 2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb DELETED
Binary file (11.3 kB)
 
wandb/run-20250212_122854-4m048f5s/files/config.yaml DELETED
@@ -1,512 +0,0 @@
1
- _attn_implementation_autoset:
2
- value: true
3
- _name_or_path:
4
- value: openai/whisper-small
5
- _wandb:
6
- value:
7
- cli_version: 0.19.6
8
- m:
9
- - "1": train/global_step
10
- "6":
11
- - 3
12
- "7": []
13
- python_version: 3.12.3
14
- t:
15
- "1":
16
- - 1
17
- - 5
18
- - 11
19
- - 49
20
- - 51
21
- - 53
22
- - 55
23
- - 71
24
- - 100
25
- "2":
26
- - 1
27
- - 5
28
- - 11
29
- - 49
30
- - 51
31
- - 53
32
- - 55
33
- - 71
34
- - 100
35
- "3":
36
- - 7
37
- - 13
38
- - 19
39
- - 23
40
- - 55
41
- - 66
42
- "4": 3.12.3
43
- "5": 0.19.6
44
- "6": 4.49.0.dev0
45
- "8":
46
- - 5
47
- "9":
48
- "1": transformers_trainer
49
- "12": 0.19.6
50
- "13": linux-x86_64
51
- accelerator_config:
52
- value:
53
- dispatch_batches: null
54
- even_batches: true
55
- gradient_accumulation_kwargs: null
56
- non_blocking: false
57
- split_batches: false
58
- use_seedable_sampler: true
59
- activation_dropout:
60
- value: 0
61
- activation_function:
62
- value: gelu
63
- adafactor:
64
- value: false
65
- adam_beta1:
66
- value: 0.9
67
- adam_beta2:
68
- value: 0.999
69
- adam_epsilon:
70
- value: 1e-08
71
- add_cross_attention:
72
- value: false
73
- apply_spec_augment:
74
- value: false
75
- architectures:
76
- value:
77
- - WhisperForConditionalGeneration
78
- attention_dropout:
79
- value: 0
80
- auto_find_batch_size:
81
- value: false
82
- average_tokens_across_devices:
83
- value: false
84
- bad_words_ids:
85
- value: null
86
- batch_eval_metrics:
87
- value: false
88
- begin_suppress_tokens:
89
- value:
90
- - 220
91
- - 50257
92
- bf16:
93
- value: false
94
- bf16_full_eval:
95
- value: false
96
- bos_token_id:
97
- value: 50257
98
- chunk_size_feed_forward:
99
- value: 0
100
- classifier_proj_size:
101
- value: 256
102
- cross_attention_hidden_size:
103
- value: null
104
- d_model:
105
- value: 768
106
- data_seed:
107
- value: null
108
- dataloader_drop_last:
109
- value: false
110
- dataloader_num_workers:
111
- value: 0
112
- dataloader_persistent_workers:
113
- value: false
114
- dataloader_pin_memory:
115
- value: true
116
- dataloader_prefetch_factor:
117
- value: null
118
- ddp_backend:
119
- value: null
120
- ddp_broadcast_buffers:
121
- value: null
122
- ddp_bucket_cap_mb:
123
- value: null
124
- ddp_find_unused_parameters:
125
- value: null
126
- ddp_timeout:
127
- value: 1800
128
- debug:
129
- value: []
130
- decoder_attention_heads:
131
- value: 12
132
- decoder_ffn_dim:
133
- value: 3072
134
- decoder_layerdrop:
135
- value: 0
136
- decoder_layers:
137
- value: 12
138
- decoder_start_token_id:
139
- value: 50258
140
- deepspeed:
141
- value: null
142
- disable_tqdm:
143
- value: false
144
- dispatch_batches:
145
- value: null
146
- diversity_penalty:
147
- value: 0
148
- do_eval:
149
- value: true
150
- do_predict:
151
- value: false
152
- do_sample:
153
- value: false
154
- do_train:
155
- value: true
156
- dropout:
157
- value: 0
158
- early_stopping:
159
- value: false
160
- encoder_attention_heads:
161
- value: 12
162
- encoder_ffn_dim:
163
- value: 3072
164
- encoder_layerdrop:
165
- value: 0
166
- encoder_layers:
167
- value: 12
168
- encoder_no_repeat_ngram_size:
169
- value: 0
170
- eos_token_id:
171
- value: 50257
172
- eval_accumulation_steps:
173
- value: null
174
- eval_delay:
175
- value: 0
176
- eval_do_concat_batches:
177
- value: true
178
- eval_on_start:
179
- value: false
180
- eval_steps:
181
- value: 1000
182
- eval_strategy:
183
- value: steps
184
- eval_use_gather_object:
185
- value: false
186
- evaluation_strategy:
187
- value: steps
188
- exponential_decay_length_penalty:
189
- value: null
190
- finetuning_task:
191
- value: null
192
- forced_bos_token_id:
193
- value: null
194
- forced_decoder_ids:
195
- value: null
196
- forced_eos_token_id:
197
- value: null
198
- fp16:
199
- value: true
200
- fp16_backend:
201
- value: auto
202
- fp16_full_eval:
203
- value: false
204
- fp16_opt_level:
205
- value: O1
206
- fsdp:
207
- value: []
208
- fsdp_config:
209
- value:
210
- min_num_params: 0
211
- xla: false
212
- xla_fsdp_grad_ckpt: false
213
- xla_fsdp_v2: false
214
- fsdp_min_num_params:
215
- value: 0
216
- fsdp_transformer_layer_cls_to_wrap:
217
- value: null
218
- full_determinism:
219
- value: false
220
- generation_config:
221
- value: null
222
- generation_max_length:
223
- value: 225
224
- generation_num_beams:
225
- value: null
226
- gradient_accumulation_steps:
227
- value: 1
228
- gradient_checkpointing:
229
- value: true
230
- gradient_checkpointing_kwargs:
231
- value: null
232
- greater_is_better:
233
- value: false
234
- group_by_length:
235
- value: false
236
- half_precision_backend:
237
- value: auto
238
- hub_always_push:
239
- value: false
240
- hub_model_id:
241
- value: null
242
- hub_private_repo:
243
- value: null
244
- hub_strategy:
245
- value: every_save
246
- hub_token:
247
- value: <HUB_TOKEN>
248
- id2label:
249
- value:
250
- "0": LABEL_0
251
- "1": LABEL_1
252
- ignore_data_skip:
253
- value: false
254
- include_for_metrics:
255
- value: []
256
- include_inputs_for_metrics:
257
- value: false
258
- include_num_input_tokens_seen:
259
- value: false
260
- include_tokens_per_second:
261
- value: false
262
- init_std:
263
- value: 0.02
264
- is_decoder:
265
- value: false
266
- is_encoder_decoder:
267
- value: true
268
- jit_mode_eval:
269
- value: false
270
- label_names:
271
- value: null
272
- label_smoothing_factor:
273
- value: 0
274
- label2id:
275
- value:
276
- LABEL_0: 0
277
- LABEL_1: 1
278
- learning_rate:
279
- value: 1e-05
280
- length_column_name:
281
- value: input_length
282
- length_penalty:
283
- value: 1
284
- load_best_model_at_end:
285
- value: true
286
- local_rank:
287
- value: 0
288
- log_level:
289
- value: passive
290
- log_level_replica:
291
- value: warning
292
- log_on_each_node:
293
- value: true
294
- logging_dir:
295
- value: ./runs/Feb12_12-28-29_tknika
296
- logging_first_step:
297
- value: false
298
- logging_nan_inf_filter:
299
- value: true
300
- logging_steps:
301
- value: 25
302
- logging_strategy:
303
- value: steps
304
- lr_scheduler_type:
305
- value: linear
306
- mask_feature_length:
307
- value: 10
308
- mask_feature_min_masks:
309
- value: 0
310
- mask_feature_prob:
311
- value: 0
312
- mask_time_length:
313
- value: 10
314
- mask_time_min_masks:
315
- value: 2
316
- mask_time_prob:
317
- value: 0.05
318
- max_grad_norm:
319
- value: 1
320
- max_length:
321
- value: 448
322
- max_source_positions:
323
- value: 1500
324
- max_steps:
325
- value: 8000
326
- max_target_positions:
327
- value: 448
328
- median_filter_width:
329
- value: 7
330
- metric_for_best_model:
331
- value: wer
332
- min_length:
333
- value: 0
334
- model/num_parameters:
335
- value: 241734912
336
- model_type:
337
- value: whisper
338
- mp_parameters:
339
- value: ""
340
- neftune_noise_alpha:
341
- value: null
342
- no_cuda:
343
- value: false
344
- no_repeat_ngram_size:
345
- value: 0
346
- num_beam_groups:
347
- value: 1
348
- num_beams:
349
- value: 1
350
- num_hidden_layers:
351
- value: 12
352
- num_mel_bins:
353
- value: 80
354
- num_return_sequences:
355
- value: 1
356
- num_train_epochs:
357
- value: 3
358
- optim:
359
- value: adamw_torch
360
- optim_args:
361
- value: null
362
- optim_target_modules:
363
- value: null
364
- output_attentions:
365
- value: false
366
- output_dir:
367
- value: ./
368
- output_hidden_states:
369
- value: false
370
- output_scores:
371
- value: false
372
- overwrite_output_dir:
373
- value: true
374
- pad_token_id:
375
- value: 50257
376
- past_index:
377
- value: -1
378
- per_device_eval_batch_size:
379
- value: 16
380
- per_device_train_batch_size:
381
- value: 32
382
- per_gpu_eval_batch_size:
383
- value: null
384
- per_gpu_train_batch_size:
385
- value: null
386
- predict_with_generate:
387
- value: true
388
- prediction_loss_only:
389
- value: false
390
- prefix:
391
- value: null
392
- problem_type:
393
- value: null
394
- push_to_hub:
395
- value: true
396
- push_to_hub_model_id:
397
- value: null
398
- push_to_hub_organization:
399
- value: null
400
- push_to_hub_token:
401
- value: <PUSH_TO_HUB_TOKEN>
402
- ray_scope:
403
- value: last
404
- remove_invalid_values:
405
- value: false
406
- remove_unused_columns:
407
- value: true
408
- repetition_penalty:
409
- value: 1
410
- report_to:
411
- value:
412
- - wandb
413
- restore_callback_states_from_checkpoint:
414
- value: false
415
- resume_from_checkpoint:
416
- value: null
417
- return_dict:
418
- value: true
419
- return_dict_in_generate:
420
- value: false
421
- run_name:
422
- value: whisper-small-eu
423
- save_on_each_node:
424
- value: false
425
- save_only_model:
426
- value: false
427
- save_safetensors:
428
- value: true
429
- save_steps:
430
- value: 1000
431
- save_strategy:
432
- value: steps
433
- save_total_limit:
434
- value: null
435
- scale_embedding:
436
- value: false
437
- seed:
438
- value: 42
439
- sep_token_id:
440
- value: null
441
- skip_memory_metrics:
442
- value: true
443
- sortish_sampler:
444
- value: false
445
- split_batches:
446
- value: null
447
- suppress_tokens:
448
- value: null
449
- task_specific_params:
450
- value: null
451
- temperature:
452
- value: 1
453
- tf_legacy_loss:
454
- value: false
455
- tf32:
456
- value: null
457
- tie_encoder_decoder:
458
- value: false
459
- tie_word_embeddings:
460
- value: true
461
- tokenizer_class:
462
- value: null
463
- top_k:
464
- value: 50
465
- top_p:
466
- value: 1
467
- torch_compile:
468
- value: false
469
- torch_compile_backend:
470
- value: null
471
- torch_compile_mode:
472
- value: null
473
- torch_dtype:
474
- value: float32
475
- torch_empty_cache_steps:
476
- value: null
477
- torchdynamo:
478
- value: null
479
- torchscript:
480
- value: false
481
- tpu_metrics_debug:
482
- value: false
483
- tpu_num_cores:
484
- value: null
485
- transformers_version:
486
- value: 4.49.0.dev0
487
- typical_p:
488
- value: 1
489
- use_bfloat16:
490
- value: false
491
- use_cache:
492
- value: false
493
- use_cpu:
494
- value: false
495
- use_ipex:
496
- value: false
497
- use_legacy_prediction_loop:
498
- value: false
499
- use_liger_kernel:
500
- value: false
501
- use_mps_device:
502
- value: false
503
- use_weighted_layer_sum:
504
- value: false
505
- vocab_size:
506
- value: 51865
507
- warmup_ratio:
508
- value: 0
509
- warmup_steps:
510
- value: 500
511
- weight_decay:
512
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122854-4m048f5s/files/output.log DELETED
@@ -1,22 +0,0 @@
1
- 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
- main()
4
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
- train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
- return inner_training_loop(
9
- ^^^^^^^^^^^^^^^^^^^^
10
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
- self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
- return self.call_event("on_epoch_begin", args, state, control)
15
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
- result = getattr(callback, event)(
18
- ^^^^^^^^^^^^^^^^^^^^^^^^^
19
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
- if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
- ^^^^^^^^^^^^^^^^^^^^^^^^
22
- AttributeError: 'NoneType' object has no attribute 'dataset'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122854-4m048f5s/files/requirements.txt DELETED
@@ -1,115 +0,0 @@
1
- aiosignal==1.3.2
2
- Markdown==3.7
3
- more-itertools==10.6.0
4
- requests==2.32.3
5
- sentry-sdk==2.21.0
6
- torchaudio==2.6.0
7
- charset-normalizer==3.4.1
8
- docker-pycreds==0.4.0
9
- nvidia-cusolver-cu12==11.6.1.9
10
- PyYAML==6.0.2
11
- librosa==0.10.2.post1
12
- soxr==0.5.0.post1
13
- multiprocess==0.70.16
14
- setuptools==75.8.0
15
- nvidia-cufft-cu12==11.2.1.3
16
- joblib==1.4.2
17
- pytz==2025.1
18
- pip==24.0
19
- scikit-learn==1.6.1
20
- certifi==2025.1.31
21
- jiwer==3.1.0
22
- regex==2024.11.6
23
- annotated-types==0.7.0
24
- grpcio==1.70.0
25
- msgpack==1.1.0
26
- mpmath==1.3.0
27
- nvidia-cudnn-cu12==9.1.0.70
28
- soundfile==0.13.1
29
- dill==0.3.8
30
- nvidia-nvtx-cu12==12.4.127
31
- six==1.17.0
32
- nvidia-cuda-cupti-cu12==12.4.127
33
- pyarrow==19.0.0
34
- nvidia-nccl-cu12==2.21.5
35
- psutil==6.1.1
36
- decorator==5.1.1
37
- llvmlite==0.44.0
38
- frozenlist==1.5.0
39
- pydantic==2.10.6
40
- networkx==3.4.2
41
- idna==3.10
42
- wandb==0.19.6
43
- aiohttp==3.11.12
44
- RapidFuzz==3.12.1
45
- pandas==2.2.3
46
- python-dateutil==2.9.0.post0
47
- numpy==2.1.3
48
- tokenizers==0.21.0
49
- nvidia-cusparselt-cu12==0.6.2
50
- typing_extensions==4.12.2
51
- urllib3==2.3.0
52
- setproctitle==1.3.4
53
- tzdata==2025.1
54
- sympy==1.13.1
55
- pooch==1.8.2
56
- click==8.1.8
57
- pydantic_core==2.27.2
58
- MarkupSafe==3.0.2
59
- scipy==1.15.1
60
- accelerate==1.3.0
61
- tensorboard==2.19.0
62
- protobuf==5.29.3
63
- gitdb==4.0.12
64
- smmap==5.0.2
65
- absl-py==2.1.0
66
- tqdm==4.67.1
67
- yarl==1.18.3
68
- pycparser==2.22
69
- nvidia-cusparse-cu12==12.3.1.170
70
- attrs==25.1.0
71
- lazy_loader==0.4
72
- tensorboard-data-server==0.7.2
73
- threadpoolctl==3.5.0
74
- GitPython==3.1.44
75
- safetensors==0.5.2
76
- fsspec==2024.12.0
77
- nvidia-cuda-nvrtc-cu12==12.4.127
78
- filelock==3.17.0
79
- aiohappyeyeballs==2.4.6
80
- packaging==24.2
81
- datasets==3.2.1.dev0
82
- audioread==3.0.1
83
- propcache==0.2.1
84
- transformers==4.49.0.dev0
85
- nvidia-cuda-runtime-cu12==12.4.127
86
- cffi==1.17.1
87
- evaluate==0.4.3
88
- Werkzeug==3.1.3
89
- huggingface-hub==0.28.1
90
- Jinja2==3.1.5
91
- torch==2.6.0
92
- nvidia-curand-cu12==10.3.5.147
93
- xxhash==3.5.0
94
- platformdirs==4.3.6
95
- multidict==6.1.0
96
- nvidia-cublas-cu12==12.4.5.8
97
- nvidia-nvjitlink-cu12==12.4.127
98
- triton==3.2.0
99
- numba==0.61.0
100
- importlib_metadata==8.0.0
101
- platformdirs==4.2.2
102
- typeguard==4.3.0
103
- more-itertools==10.3.0
104
- tomli==2.0.1
105
- autocommand==2.2.2
106
- zipp==3.19.2
107
- typing_extensions==4.12.2
108
- backports.tarfile==1.2.0
109
- inflect==7.3.1
110
- jaraco.text==3.12.1
111
- wheel==0.43.0
112
- packaging==24.2
113
- jaraco.collections==5.1.0
114
- jaraco.functools==4.0.1
115
- jaraco.context==5.3.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
- "python": "CPython 3.12.3",
4
- "startedAt": "2025-02-12T12:28:54.528397Z",
5
- "args": [
6
- "--model_name_or_path=openai/whisper-small",
7
- "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
- "--language=basque",
9
- "--train_split_name=train",
10
- "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
11
- "--model_index_name=Whisper Small Basque",
12
- "--max_steps=8000",
13
- "--output_dir=./",
14
- "--per_device_train_batch_size=32",
15
- "--per_device_eval_batch_size=16",
16
- "--gradient_accumulation_steps=1",
17
- "--logging_steps=25",
18
- "--learning_rate=1e-5",
19
- "--warmup_steps=500",
20
- "--evaluation_strategy=steps",
21
- "--eval_steps=1000",
22
- "--save_strategy=steps",
23
- "--save_steps=1000",
24
- "--generation_max_length=225",
25
- "--length_column_name=input_length",
26
- "--max_duration_in_seconds=30",
27
- "--text_column_name=sentence",
28
- "--freeze_feature_encoder=False",
29
- "--report_to=tensorboard",
30
- "--metric_for_best_model=wer",
31
- "--greater_is_better=False",
32
- "--load_best_model_at_end",
33
- "--gradient_checkpointing",
34
- "--fp16",
35
- "--overwrite_output_dir",
36
- "--do_train",
37
- "--do_eval",
38
- "--predict_with_generate",
39
- "--do_normalize_eval",
40
- "--streaming",
41
- "--use_auth_token",
42
- "--push_to_hub",
43
- "--report_to",
44
- "wandb",
45
- "--run_name",
46
- "whisper-small-eu"
47
- ],
48
- "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
- "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
- "git": {
51
- "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
- "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
- },
54
- "email": "[email protected]",
55
- "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
- "host": "tknika",
57
- "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
- "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
- "cpu_count": 8,
60
- "cpu_count_logical": 8,
61
- "gpu": "NVIDIA L40-48Q",
62
- "gpu_count": 1,
63
- "disk": {
64
- "/": {
65
- "total": "525987168256",
66
- "used": "297346756608"
67
- }
68
- },
69
- "memory": {
70
- "total": "33654022144"
71
- },
72
- "cpu": {
73
- "count": 8,
74
- "countLogical": 8
75
- },
76
- "gpu_nvidia": [
77
- {
78
- "name": "NVIDIA L40-48Q",
79
- "memoryTotal": "51539607552",
80
- "cudaCores": 18176,
81
- "architecture": "Ada"
82
- }
83
- ],
84
- "cudaVersion": "12.4"
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":0}}
 
 
wandb/run-20250212_122854-4m048f5s/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
- {"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528}
3
- {"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}}
4
- {"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"}
5
- {"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"}
6
- {"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"}
7
- {"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"}
8
- {"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"}
9
- {"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"}
10
- {"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"}
11
- {"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"}
12
- {"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"}
13
- {"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"}
14
- {"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log DELETED
@@ -1,15 +0,0 @@
1
- {"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"}
2
- {"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"}
3
- {"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"}
4
- {"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"}
5
- {"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"}
6
- {"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"}
7
- {"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"}
8
- {"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"}
9
- {"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"}
10
- {"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"}
11
- {"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
- {"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"}
13
- {"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"}
14
- {"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"}
15
- {"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122854-4m048f5s/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528
3
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log
7
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
8
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():756] calling init triggers
9
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
- config: {'_wandb': {}}
11
- 2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():789] starting backend
12
- 2025-02-12 12:28:54,521 INFO MainThread:224528 [wandb_init.py:init():793] sending inform_init request
13
- 2025-02-12 12:28:54,527 INFO MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-02-12 12:28:54,528 INFO MainThread:224528 [wandb_init.py:init():808] backend started and connected
15
- 2025-02-12 12:28:54,530 INFO MainThread:224528 [wandb_init.py:init():901] updated telemetry
16
- 2025-02-12 12:28:54,537 INFO MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
- 2025-02-12 12:28:54,883 INFO MainThread:224528 [wandb_init.py:init():994] starting run threads in backend
18
- 2025-02-12 12:28:54,988 INFO MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg
19
- 2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
- 2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
- 2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed.
22
- 2025-02-12 12:28:54,990 INFO MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process
23
- 2025-02-12 12:28:54,991 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
- 2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x71c5f6c57cb0>>
25
- 2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
- 2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb DELETED
Binary file (11.3 kB)
 
wandb/run-20250212_125202-c6xjc1gs/files/config.yaml DELETED
@@ -1,512 +0,0 @@
1
- _attn_implementation_autoset:
2
- value: true
3
- _name_or_path:
4
- value: openai/whisper-small
5
- _wandb:
6
- value:
7
- cli_version: 0.19.6
8
- m:
9
- - "1": train/global_step
10
- "6":
11
- - 3
12
- "7": []
13
- python_version: 3.12.3
14
- t:
15
- "1":
16
- - 1
17
- - 5
18
- - 11
19
- - 49
20
- - 51
21
- - 53
22
- - 55
23
- - 71
24
- - 100
25
- "2":
26
- - 1
27
- - 5
28
- - 11
29
- - 49
30
- - 51
31
- - 53
32
- - 55
33
- - 71
34
- - 100
35
- "3":
36
- - 7
37
- - 13
38
- - 19
39
- - 23
40
- - 55
41
- - 66
42
- "4": 3.12.3
43
- "5": 0.19.6
44
- "6": 4.49.0.dev0
45
- "8":
46
- - 5
47
- "9":
48
- "1": transformers_trainer
49
- "12": 0.19.6
50
- "13": linux-x86_64
51
- accelerator_config:
52
- value:
53
- dispatch_batches: null
54
- even_batches: true
55
- gradient_accumulation_kwargs: null
56
- non_blocking: false
57
- split_batches: false
58
- use_seedable_sampler: true
59
- activation_dropout:
60
- value: 0
61
- activation_function:
62
- value: gelu
63
- adafactor:
64
- value: false
65
- adam_beta1:
66
- value: 0.9
67
- adam_beta2:
68
- value: 0.999
69
- adam_epsilon:
70
- value: 1e-08
71
- add_cross_attention:
72
- value: false
73
- apply_spec_augment:
74
- value: false
75
- architectures:
76
- value:
77
- - WhisperForConditionalGeneration
78
- attention_dropout:
79
- value: 0
80
- auto_find_batch_size:
81
- value: false
82
- average_tokens_across_devices:
83
- value: false
84
- bad_words_ids:
85
- value: null
86
- batch_eval_metrics:
87
- value: false
88
- begin_suppress_tokens:
89
- value:
90
- - 220
91
- - 50257
92
- bf16:
93
- value: false
94
- bf16_full_eval:
95
- value: false
96
- bos_token_id:
97
- value: 50257
98
- chunk_size_feed_forward:
99
- value: 0
100
- classifier_proj_size:
101
- value: 256
102
- cross_attention_hidden_size:
103
- value: null
104
- d_model:
105
- value: 768
106
- data_seed:
107
- value: null
108
- dataloader_drop_last:
109
- value: false
110
- dataloader_num_workers:
111
- value: 0
112
- dataloader_persistent_workers:
113
- value: false
114
- dataloader_pin_memory:
115
- value: true
116
- dataloader_prefetch_factor:
117
- value: null
118
- ddp_backend:
119
- value: null
120
- ddp_broadcast_buffers:
121
- value: null
122
- ddp_bucket_cap_mb:
123
- value: null
124
- ddp_find_unused_parameters:
125
- value: null
126
- ddp_timeout:
127
- value: 1800
128
- debug:
129
- value: []
130
- decoder_attention_heads:
131
- value: 12
132
- decoder_ffn_dim:
133
- value: 3072
134
- decoder_layerdrop:
135
- value: 0
136
- decoder_layers:
137
- value: 12
138
- decoder_start_token_id:
139
- value: 50258
140
- deepspeed:
141
- value: null
142
- disable_tqdm:
143
- value: false
144
- dispatch_batches:
145
- value: null
146
- diversity_penalty:
147
- value: 0
148
- do_eval:
149
- value: true
150
- do_predict:
151
- value: false
152
- do_sample:
153
- value: false
154
- do_train:
155
- value: true
156
- dropout:
157
- value: 0
158
- early_stopping:
159
- value: false
160
- encoder_attention_heads:
161
- value: 12
162
- encoder_ffn_dim:
163
- value: 3072
164
- encoder_layerdrop:
165
- value: 0
166
- encoder_layers:
167
- value: 12
168
- encoder_no_repeat_ngram_size:
169
- value: 0
170
- eos_token_id:
171
- value: 50257
172
- eval_accumulation_steps:
173
- value: null
174
- eval_delay:
175
- value: 0
176
- eval_do_concat_batches:
177
- value: true
178
- eval_on_start:
179
- value: false
180
- eval_steps:
181
- value: 1000
182
- eval_strategy:
183
- value: steps
184
- eval_use_gather_object:
185
- value: false
186
- evaluation_strategy:
187
- value: steps
188
- exponential_decay_length_penalty:
189
- value: null
190
- finetuning_task:
191
- value: null
192
- forced_bos_token_id:
193
- value: null
194
- forced_decoder_ids:
195
- value: null
196
- forced_eos_token_id:
197
- value: null
198
- fp16:
199
- value: true
200
- fp16_backend:
201
- value: auto
202
- fp16_full_eval:
203
- value: false
204
- fp16_opt_level:
205
- value: O1
206
- fsdp:
207
- value: []
208
- fsdp_config:
209
- value:
210
- min_num_params: 0
211
- xla: false
212
- xla_fsdp_grad_ckpt: false
213
- xla_fsdp_v2: false
214
- fsdp_min_num_params:
215
- value: 0
216
- fsdp_transformer_layer_cls_to_wrap:
217
- value: null
218
- full_determinism:
219
- value: false
220
- generation_config:
221
- value: null
222
- generation_max_length:
223
- value: 225
224
- generation_num_beams:
225
- value: null
226
- gradient_accumulation_steps:
227
- value: 1
228
- gradient_checkpointing:
229
- value: true
230
- gradient_checkpointing_kwargs:
231
- value: null
232
- greater_is_better:
233
- value: false
234
- group_by_length:
235
- value: false
236
- half_precision_backend:
237
- value: auto
238
- hub_always_push:
239
- value: false
240
- hub_model_id:
241
- value: null
242
- hub_private_repo:
243
- value: null
244
- hub_strategy:
245
- value: every_save
246
- hub_token:
247
- value: <HUB_TOKEN>
248
- id2label:
249
- value:
250
- "0": LABEL_0
251
- "1": LABEL_1
252
- ignore_data_skip:
253
- value: false
254
- include_for_metrics:
255
- value: []
256
- include_inputs_for_metrics:
257
- value: false
258
- include_num_input_tokens_seen:
259
- value: false
260
- include_tokens_per_second:
261
- value: false
262
- init_std:
263
- value: 0.02
264
- is_decoder:
265
- value: false
266
- is_encoder_decoder:
267
- value: true
268
- jit_mode_eval:
269
- value: false
270
- label_names:
271
- value: null
272
- label_smoothing_factor:
273
- value: 0
274
- label2id:
275
- value:
276
- LABEL_0: 0
277
- LABEL_1: 1
278
- learning_rate:
279
- value: 1e-05
280
- length_column_name:
281
- value: input_length
282
- length_penalty:
283
- value: 1
284
- load_best_model_at_end:
285
- value: true
286
- local_rank:
287
- value: 0
288
- log_level:
289
- value: passive
290
- log_level_replica:
291
- value: warning
292
- log_on_each_node:
293
- value: true
294
- logging_dir:
295
- value: ./runs/Feb12_12-51-48_tknika
296
- logging_first_step:
297
- value: false
298
- logging_nan_inf_filter:
299
- value: true
300
- logging_steps:
301
- value: 25
302
- logging_strategy:
303
- value: steps
304
- lr_scheduler_type:
305
- value: linear
306
- mask_feature_length:
307
- value: 10
308
- mask_feature_min_masks:
309
- value: 0
310
- mask_feature_prob:
311
- value: 0
312
- mask_time_length:
313
- value: 10
314
- mask_time_min_masks:
315
- value: 2
316
- mask_time_prob:
317
- value: 0.05
318
- max_grad_norm:
319
- value: 1
320
- max_length:
321
- value: 448
322
- max_source_positions:
323
- value: 1500
324
- max_steps:
325
- value: 8000
326
- max_target_positions:
327
- value: 448
328
- median_filter_width:
329
- value: 7
330
- metric_for_best_model:
331
- value: wer
332
- min_length:
333
- value: 0
334
- model/num_parameters:
335
- value: 241734912
336
- model_type:
337
- value: whisper
338
- mp_parameters:
339
- value: ""
340
- neftune_noise_alpha:
341
- value: null
342
- no_cuda:
343
- value: false
344
- no_repeat_ngram_size:
345
- value: 0
346
- num_beam_groups:
347
- value: 1
348
- num_beams:
349
- value: 1
350
- num_hidden_layers:
351
- value: 12
352
- num_mel_bins:
353
- value: 80
354
- num_return_sequences:
355
- value: 1
356
- num_train_epochs:
357
- value: 3
358
- optim:
359
- value: adamw_torch
360
- optim_args:
361
- value: null
362
- optim_target_modules:
363
- value: null
364
- output_attentions:
365
- value: false
366
- output_dir:
367
- value: ./
368
- output_hidden_states:
369
- value: false
370
- output_scores:
371
- value: false
372
- overwrite_output_dir:
373
- value: true
374
- pad_token_id:
375
- value: 50257
376
- past_index:
377
- value: -1
378
- per_device_eval_batch_size:
379
- value: 16
380
- per_device_train_batch_size:
381
- value: 32
382
- per_gpu_eval_batch_size:
383
- value: null
384
- per_gpu_train_batch_size:
385
- value: null
386
- predict_with_generate:
387
- value: true
388
- prediction_loss_only:
389
- value: false
390
- prefix:
391
- value: null
392
- problem_type:
393
- value: null
394
- push_to_hub:
395
- value: true
396
- push_to_hub_model_id:
397
- value: null
398
- push_to_hub_organization:
399
- value: null
400
- push_to_hub_token:
401
- value: <PUSH_TO_HUB_TOKEN>
402
- ray_scope:
403
- value: last
404
- remove_invalid_values:
405
- value: false
406
- remove_unused_columns:
407
- value: true
408
- repetition_penalty:
409
- value: 1
410
- report_to:
411
- value:
412
- - wandb
413
- restore_callback_states_from_checkpoint:
414
- value: false
415
- resume_from_checkpoint:
416
- value: null
417
- return_dict:
418
- value: true
419
- return_dict_in_generate:
420
- value: false
421
- run_name:
422
- value: whisper-small-eu
423
- save_on_each_node:
424
- value: false
425
- save_only_model:
426
- value: false
427
- save_safetensors:
428
- value: true
429
- save_steps:
430
- value: 1000
431
- save_strategy:
432
- value: steps
433
- save_total_limit:
434
- value: null
435
- scale_embedding:
436
- value: false
437
- seed:
438
- value: 42
439
- sep_token_id:
440
- value: null
441
- skip_memory_metrics:
442
- value: true
443
- sortish_sampler:
444
- value: false
445
- split_batches:
446
- value: null
447
- suppress_tokens:
448
- value: null
449
- task_specific_params:
450
- value: null
451
- temperature:
452
- value: 1
453
- tf_legacy_loss:
454
- value: false
455
- tf32:
456
- value: null
457
- tie_encoder_decoder:
458
- value: false
459
- tie_word_embeddings:
460
- value: true
461
- tokenizer_class:
462
- value: null
463
- top_k:
464
- value: 50
465
- top_p:
466
- value: 1
467
- torch_compile:
468
- value: false
469
- torch_compile_backend:
470
- value: null
471
- torch_compile_mode:
472
- value: null
473
- torch_dtype:
474
- value: float32
475
- torch_empty_cache_steps:
476
- value: null
477
- torchdynamo:
478
- value: null
479
- torchscript:
480
- value: false
481
- tpu_metrics_debug:
482
- value: false
483
- tpu_num_cores:
484
- value: null
485
- transformers_version:
486
- value: 4.49.0.dev0
487
- typical_p:
488
- value: 1
489
- use_bfloat16:
490
- value: false
491
- use_cache:
492
- value: false
493
- use_cpu:
494
- value: false
495
- use_ipex:
496
- value: false
497
- use_legacy_prediction_loop:
498
- value: false
499
- use_liger_kernel:
500
- value: false
501
- use_mps_device:
502
- value: false
503
- use_weighted_layer_sum:
504
- value: false
505
- vocab_size:
506
- value: 51865
507
- warmup_ratio:
508
- value: 0
509
- warmup_steps:
510
- value: 500
511
- weight_decay:
512
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125202-c6xjc1gs/files/output.log DELETED
@@ -1,22 +0,0 @@
1
- 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
3
- main()
4
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
5
- train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
- return inner_training_loop(
9
- ^^^^^^^^^^^^^^^^^^^^
10
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
- self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
- return self.call_event("on_epoch_begin", args, state, control)
15
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
- result = getattr(callback, event)(
18
- ^^^^^^^^^^^^^^^^^^^^^^^^^
19
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
20
- if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
- ^^^^^^^^^^^^^^^^^^^^^^^^
22
- AttributeError: 'NoneType' object has no attribute 'dataset'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt DELETED
@@ -1,115 +0,0 @@
1
- aiosignal==1.3.2
2
- Markdown==3.7
3
- more-itertools==10.6.0
4
- requests==2.32.3
5
- sentry-sdk==2.21.0
6
- torchaudio==2.6.0
7
- charset-normalizer==3.4.1
8
- docker-pycreds==0.4.0
9
- nvidia-cusolver-cu12==11.6.1.9
10
- PyYAML==6.0.2
11
- librosa==0.10.2.post1
12
- soxr==0.5.0.post1
13
- multiprocess==0.70.16
14
- setuptools==75.8.0
15
- nvidia-cufft-cu12==11.2.1.3
16
- joblib==1.4.2
17
- pytz==2025.1
18
- pip==24.0
19
- scikit-learn==1.6.1
20
- certifi==2025.1.31
21
- jiwer==3.1.0
22
- regex==2024.11.6
23
- annotated-types==0.7.0
24
- grpcio==1.70.0
25
- msgpack==1.1.0
26
- mpmath==1.3.0
27
- nvidia-cudnn-cu12==9.1.0.70
28
- soundfile==0.13.1
29
- dill==0.3.8
30
- nvidia-nvtx-cu12==12.4.127
31
- six==1.17.0
32
- nvidia-cuda-cupti-cu12==12.4.127
33
- pyarrow==19.0.0
34
- nvidia-nccl-cu12==2.21.5
35
- psutil==6.1.1
36
- decorator==5.1.1
37
- llvmlite==0.44.0
38
- frozenlist==1.5.0
39
- pydantic==2.10.6
40
- networkx==3.4.2
41
- idna==3.10
42
- wandb==0.19.6
43
- aiohttp==3.11.12
44
- RapidFuzz==3.12.1
45
- pandas==2.2.3
46
- python-dateutil==2.9.0.post0
47
- numpy==2.1.3
48
- tokenizers==0.21.0
49
- nvidia-cusparselt-cu12==0.6.2
50
- typing_extensions==4.12.2
51
- urllib3==2.3.0
52
- setproctitle==1.3.4
53
- tzdata==2025.1
54
- sympy==1.13.1
55
- pooch==1.8.2
56
- click==8.1.8
57
- pydantic_core==2.27.2
58
- MarkupSafe==3.0.2
59
- scipy==1.15.1
60
- accelerate==1.3.0
61
- tensorboard==2.19.0
62
- protobuf==5.29.3
63
- gitdb==4.0.12
64
- smmap==5.0.2
65
- absl-py==2.1.0
66
- tqdm==4.67.1
67
- yarl==1.18.3
68
- pycparser==2.22
69
- nvidia-cusparse-cu12==12.3.1.170
70
- attrs==25.1.0
71
- lazy_loader==0.4
72
- tensorboard-data-server==0.7.2
73
- threadpoolctl==3.5.0
74
- GitPython==3.1.44
75
- safetensors==0.5.2
76
- fsspec==2024.12.0
77
- nvidia-cuda-nvrtc-cu12==12.4.127
78
- filelock==3.17.0
79
- aiohappyeyeballs==2.4.6
80
- packaging==24.2
81
- datasets==3.2.1.dev0
82
- audioread==3.0.1
83
- propcache==0.2.1
84
- transformers==4.49.0.dev0
85
- nvidia-cuda-runtime-cu12==12.4.127
86
- cffi==1.17.1
87
- evaluate==0.4.3
88
- Werkzeug==3.1.3
89
- huggingface-hub==0.28.1
90
- Jinja2==3.1.5
91
- torch==2.6.0
92
- nvidia-curand-cu12==10.3.5.147
93
- xxhash==3.5.0
94
- platformdirs==4.3.6
95
- multidict==6.1.0
96
- nvidia-cublas-cu12==12.4.5.8
97
- nvidia-nvjitlink-cu12==12.4.127
98
- triton==3.2.0
99
- numba==0.61.0
100
- importlib_metadata==8.0.0
101
- platformdirs==4.2.2
102
- typeguard==4.3.0
103
- more-itertools==10.3.0
104
- tomli==2.0.1
105
- autocommand==2.2.2
106
- zipp==3.19.2
107
- typing_extensions==4.12.2
108
- backports.tarfile==1.2.0
109
- inflect==7.3.1
110
- jaraco.text==3.12.1
111
- wheel==0.43.0
112
- packaging==24.2
113
- jaraco.collections==5.1.0
114
- jaraco.functools==4.0.1
115
- jaraco.context==5.3.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
- "python": "CPython 3.12.3",
4
- "startedAt": "2025-02-12T12:52:03.105234Z",
5
- "args": [
6
- "--model_name_or_path=openai/whisper-small",
7
- "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
- "--language=basque",
9
- "--train_split_name=train",
10
- "--eval_split_name=test_parl",
11
- "--model_index_name=Whisper Small Basque",
12
- "--max_steps=8000",
13
- "--output_dir=./",
14
- "--per_device_train_batch_size=32",
15
- "--per_device_eval_batch_size=16",
16
- "--gradient_accumulation_steps=1",
17
- "--logging_steps=25",
18
- "--learning_rate=1e-5",
19
- "--warmup_steps=500",
20
- "--evaluation_strategy=steps",
21
- "--eval_steps=1000",
22
- "--save_strategy=steps",
23
- "--save_steps=1000",
24
- "--generation_max_length=225",
25
- "--length_column_name=input_length",
26
- "--max_duration_in_seconds=30",
27
- "--text_column_name=sentence",
28
- "--freeze_feature_encoder=False",
29
- "--report_to=tensorboard",
30
- "--metric_for_best_model=wer",
31
- "--greater_is_better=False",
32
- "--load_best_model_at_end",
33
- "--gradient_checkpointing",
34
- "--fp16",
35
- "--overwrite_output_dir",
36
- "--do_train",
37
- "--do_eval",
38
- "--predict_with_generate",
39
- "--do_normalize_eval",
40
- "--streaming",
41
- "--use_auth_token",
42
- "--push_to_hub",
43
- "--report_to",
44
- "wandb",
45
- "--run_name",
46
- "whisper-small-eu"
47
- ],
48
- "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
- "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
- "git": {
51
- "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
- "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
- },
54
- "email": "[email protected]",
55
- "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
- "host": "tknika",
57
- "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
- "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
- "cpu_count": 8,
60
- "cpu_count_logical": 8,
61
- "gpu": "NVIDIA L40-48Q",
62
- "gpu_count": 1,
63
- "disk": {
64
- "/": {
65
- "total": "525987168256",
66
- "used": "313777016832"
67
- }
68
- },
69
- "memory": {
70
- "total": "33654022144"
71
- },
72
- "cpu": {
73
- "count": 8,
74
- "countLogical": 8
75
- },
76
- "gpu_nvidia": [
77
- {
78
- "name": "NVIDIA L40-48Q",
79
- "memoryTotal": "51539607552",
80
- "cudaCores": 18176,
81
- "architecture": "Ada"
82
- }
83
- ],
84
- "cudaVersion": "12.4"
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":0}}
 
 
wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
- {"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112}
3
- {"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}}
4
- {"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"}
5
- {"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
6
- {"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
7
- {"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"}
8
- {"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"}
9
- {"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"}
10
- {"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"}
11
- {"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"}
12
- {"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"}
13
- {"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"}
14
- {"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log DELETED
@@ -1,15 +0,0 @@
1
- {"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"}
2
- {"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"}
3
- {"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"}
4
- {"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"}
5
- {"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"}
6
- {"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"}
7
- {"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"}
8
- {"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"}
9
- {"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"}
10
- {"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"}
11
- {"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
- {"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"}
13
- {"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"}
14
- {"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"}
15
- {"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125202-c6xjc1gs/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-02-12 12:52:02,886 INFO MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112
3
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
7
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
8
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():756] calling init triggers
9
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
- config: {'_wandb': {}}
11
- 2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():789] starting backend
12
- 2025-02-12 12:52:03,097 INFO MainThread:226112 [wandb_init.py:init():793] sending inform_init request
13
- 2025-02-12 12:52:03,104 INFO MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-02-12 12:52:03,104 INFO MainThread:226112 [wandb_init.py:init():808] backend started and connected
15
- 2025-02-12 12:52:03,107 INFO MainThread:226112 [wandb_init.py:init():901] updated telemetry
16
- 2025-02-12 12:52:03,114 INFO MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
- 2025-02-12 12:52:03,483 INFO MainThread:226112 [wandb_init.py:init():994] starting run threads in backend
18
- 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg
19
- 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
- 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
- 2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed.
22
- 2025-02-12 12:52:03,568 INFO MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process
23
- 2025-02-12 12:52:03,569 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
- 2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7d4830f2ddf0>>
25
- 2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
- 2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb DELETED
Binary file (11.3 kB)
 
wandb/run-20250212_125924-xhsgsxqq/files/config.yaml DELETED
@@ -1,512 +0,0 @@
1
- _attn_implementation_autoset:
2
- value: true
3
- _name_or_path:
4
- value: openai/whisper-small
5
- _wandb:
6
- value:
7
- cli_version: 0.19.6
8
- m:
9
- - "1": train/global_step
10
- "6":
11
- - 3
12
- "7": []
13
- python_version: 3.12.3
14
- t:
15
- "1":
16
- - 1
17
- - 5
18
- - 11
19
- - 49
20
- - 51
21
- - 53
22
- - 55
23
- - 71
24
- - 100
25
- "2":
26
- - 1
27
- - 5
28
- - 11
29
- - 49
30
- - 51
31
- - 53
32
- - 55
33
- - 71
34
- - 100
35
- "3":
36
- - 7
37
- - 13
38
- - 19
39
- - 23
40
- - 55
41
- - 66
42
- "4": 3.12.3
43
- "5": 0.19.6
44
- "6": 4.49.0.dev0
45
- "8":
46
- - 5
47
- "9":
48
- "1": transformers_trainer
49
- "12": 0.19.6
50
- "13": linux-x86_64
51
- accelerator_config:
52
- value:
53
- dispatch_batches: null
54
- even_batches: true
55
- gradient_accumulation_kwargs: null
56
- non_blocking: false
57
- split_batches: false
58
- use_seedable_sampler: true
59
- activation_dropout:
60
- value: 0
61
- activation_function:
62
- value: gelu
63
- adafactor:
64
- value: false
65
- adam_beta1:
66
- value: 0.9
67
- adam_beta2:
68
- value: 0.999
69
- adam_epsilon:
70
- value: 1e-08
71
- add_cross_attention:
72
- value: false
73
- apply_spec_augment:
74
- value: false
75
- architectures:
76
- value:
77
- - WhisperForConditionalGeneration
78
- attention_dropout:
79
- value: 0
80
- auto_find_batch_size:
81
- value: false
82
- average_tokens_across_devices:
83
- value: false
84
- bad_words_ids:
85
- value: null
86
- batch_eval_metrics:
87
- value: false
88
- begin_suppress_tokens:
89
- value:
90
- - 220
91
- - 50257
92
- bf16:
93
- value: false
94
- bf16_full_eval:
95
- value: false
96
- bos_token_id:
97
- value: 50257
98
- chunk_size_feed_forward:
99
- value: 0
100
- classifier_proj_size:
101
- value: 256
102
- cross_attention_hidden_size:
103
- value: null
104
- d_model:
105
- value: 768
106
- data_seed:
107
- value: null
108
- dataloader_drop_last:
109
- value: false
110
- dataloader_num_workers:
111
- value: 0
112
- dataloader_persistent_workers:
113
- value: false
114
- dataloader_pin_memory:
115
- value: true
116
- dataloader_prefetch_factor:
117
- value: null
118
- ddp_backend:
119
- value: null
120
- ddp_broadcast_buffers:
121
- value: null
122
- ddp_bucket_cap_mb:
123
- value: null
124
- ddp_find_unused_parameters:
125
- value: null
126
- ddp_timeout:
127
- value: 1800
128
- debug:
129
- value: []
130
- decoder_attention_heads:
131
- value: 12
132
- decoder_ffn_dim:
133
- value: 3072
134
- decoder_layerdrop:
135
- value: 0
136
- decoder_layers:
137
- value: 12
138
- decoder_start_token_id:
139
- value: 50258
140
- deepspeed:
141
- value: null
142
- disable_tqdm:
143
- value: false
144
- dispatch_batches:
145
- value: null
146
- diversity_penalty:
147
- value: 0
148
- do_eval:
149
- value: true
150
- do_predict:
151
- value: false
152
- do_sample:
153
- value: false
154
- do_train:
155
- value: true
156
- dropout:
157
- value: 0
158
- early_stopping:
159
- value: false
160
- encoder_attention_heads:
161
- value: 12
162
- encoder_ffn_dim:
163
- value: 3072
164
- encoder_layerdrop:
165
- value: 0
166
- encoder_layers:
167
- value: 12
168
- encoder_no_repeat_ngram_size:
169
- value: 0
170
- eos_token_id:
171
- value: 50257
172
- eval_accumulation_steps:
173
- value: null
174
- eval_delay:
175
- value: 0
176
- eval_do_concat_batches:
177
- value: true
178
- eval_on_start:
179
- value: false
180
- eval_steps:
181
- value: 1000
182
- eval_strategy:
183
- value: steps
184
- eval_use_gather_object:
185
- value: false
186
- evaluation_strategy:
187
- value: steps
188
- exponential_decay_length_penalty:
189
- value: null
190
- finetuning_task:
191
- value: null
192
- forced_bos_token_id:
193
- value: null
194
- forced_decoder_ids:
195
- value: null
196
- forced_eos_token_id:
197
- value: null
198
- fp16:
199
- value: true
200
- fp16_backend:
201
- value: auto
202
- fp16_full_eval:
203
- value: false
204
- fp16_opt_level:
205
- value: O1
206
- fsdp:
207
- value: []
208
- fsdp_config:
209
- value:
210
- min_num_params: 0
211
- xla: false
212
- xla_fsdp_grad_ckpt: false
213
- xla_fsdp_v2: false
214
- fsdp_min_num_params:
215
- value: 0
216
- fsdp_transformer_layer_cls_to_wrap:
217
- value: null
218
- full_determinism:
219
- value: false
220
- generation_config:
221
- value: null
222
- generation_max_length:
223
- value: 225
224
- generation_num_beams:
225
- value: null
226
- gradient_accumulation_steps:
227
- value: 1
228
- gradient_checkpointing:
229
- value: true
230
- gradient_checkpointing_kwargs:
231
- value: null
232
- greater_is_better:
233
- value: false
234
- group_by_length:
235
- value: false
236
- half_precision_backend:
237
- value: auto
238
- hub_always_push:
239
- value: false
240
- hub_model_id:
241
- value: null
242
- hub_private_repo:
243
- value: null
244
- hub_strategy:
245
- value: every_save
246
- hub_token:
247
- value: <HUB_TOKEN>
248
- id2label:
249
- value:
250
- "0": LABEL_0
251
- "1": LABEL_1
252
- ignore_data_skip:
253
- value: false
254
- include_for_metrics:
255
- value: []
256
- include_inputs_for_metrics:
257
- value: false
258
- include_num_input_tokens_seen:
259
- value: false
260
- include_tokens_per_second:
261
- value: false
262
- init_std:
263
- value: 0.02
264
- is_decoder:
265
- value: false
266
- is_encoder_decoder:
267
- value: true
268
- jit_mode_eval:
269
- value: false
270
- label_names:
271
- value: null
272
- label_smoothing_factor:
273
- value: 0
274
- label2id:
275
- value:
276
- LABEL_0: 0
277
- LABEL_1: 1
278
- learning_rate:
279
- value: 1e-05
280
- length_column_name:
281
- value: input_length
282
- length_penalty:
283
- value: 1
284
- load_best_model_at_end:
285
- value: true
286
- local_rank:
287
- value: 0
288
- log_level:
289
- value: passive
290
- log_level_replica:
291
- value: warning
292
- log_on_each_node:
293
- value: true
294
- logging_dir:
295
- value: ./runs/Feb12_12-58-59_tknika
296
- logging_first_step:
297
- value: false
298
- logging_nan_inf_filter:
299
- value: true
300
- logging_steps:
301
- value: 25
302
- logging_strategy:
303
- value: steps
304
- lr_scheduler_type:
305
- value: linear
306
- mask_feature_length:
307
- value: 10
308
- mask_feature_min_masks:
309
- value: 0
310
- mask_feature_prob:
311
- value: 0
312
- mask_time_length:
313
- value: 10
314
- mask_time_min_masks:
315
- value: 2
316
- mask_time_prob:
317
- value: 0.05
318
- max_grad_norm:
319
- value: 1
320
- max_length:
321
- value: 448
322
- max_source_positions:
323
- value: 1500
324
- max_steps:
325
- value: 8000
326
- max_target_positions:
327
- value: 448
328
- median_filter_width:
329
- value: 7
330
- metric_for_best_model:
331
- value: wer
332
- min_length:
333
- value: 0
334
- model/num_parameters:
335
- value: 241734912
336
- model_type:
337
- value: whisper
338
- mp_parameters:
339
- value: ""
340
- neftune_noise_alpha:
341
- value: null
342
- no_cuda:
343
- value: false
344
- no_repeat_ngram_size:
345
- value: 0
346
- num_beam_groups:
347
- value: 1
348
- num_beams:
349
- value: 1
350
- num_hidden_layers:
351
- value: 12
352
- num_mel_bins:
353
- value: 80
354
- num_return_sequences:
355
- value: 1
356
- num_train_epochs:
357
- value: 3
358
- optim:
359
- value: adamw_torch
360
- optim_args:
361
- value: null
362
- optim_target_modules:
363
- value: null
364
- output_attentions:
365
- value: false
366
- output_dir:
367
- value: ./
368
- output_hidden_states:
369
- value: false
370
- output_scores:
371
- value: false
372
- overwrite_output_dir:
373
- value: true
374
- pad_token_id:
375
- value: 50257
376
- past_index:
377
- value: -1
378
- per_device_eval_batch_size:
379
- value: 16
380
- per_device_train_batch_size:
381
- value: 32
382
- per_gpu_eval_batch_size:
383
- value: null
384
- per_gpu_train_batch_size:
385
- value: null
386
- predict_with_generate:
387
- value: true
388
- prediction_loss_only:
389
- value: false
390
- prefix:
391
- value: null
392
- problem_type:
393
- value: null
394
- push_to_hub:
395
- value: true
396
- push_to_hub_model_id:
397
- value: null
398
- push_to_hub_organization:
399
- value: null
400
- push_to_hub_token:
401
- value: <PUSH_TO_HUB_TOKEN>
402
- ray_scope:
403
- value: last
404
- remove_invalid_values:
405
- value: false
406
- remove_unused_columns:
407
- value: true
408
- repetition_penalty:
409
- value: 1
410
- report_to:
411
- value:
412
- - wandb
413
- restore_callback_states_from_checkpoint:
414
- value: false
415
- resume_from_checkpoint:
416
- value: null
417
- return_dict:
418
- value: true
419
- return_dict_in_generate:
420
- value: false
421
- run_name:
422
- value: whisper-small-eu
423
- save_on_each_node:
424
- value: false
425
- save_only_model:
426
- value: false
427
- save_safetensors:
428
- value: true
429
- save_steps:
430
- value: 1000
431
- save_strategy:
432
- value: steps
433
- save_total_limit:
434
- value: null
435
- scale_embedding:
436
- value: false
437
- seed:
438
- value: 42
439
- sep_token_id:
440
- value: null
441
- skip_memory_metrics:
442
- value: true
443
- sortish_sampler:
444
- value: false
445
- split_batches:
446
- value: null
447
- suppress_tokens:
448
- value: null
449
- task_specific_params:
450
- value: null
451
- temperature:
452
- value: 1
453
- tf_legacy_loss:
454
- value: false
455
- tf32:
456
- value: null
457
- tie_encoder_decoder:
458
- value: false
459
- tie_word_embeddings:
460
- value: true
461
- tokenizer_class:
462
- value: null
463
- top_k:
464
- value: 50
465
- top_p:
466
- value: 1
467
- torch_compile:
468
- value: false
469
- torch_compile_backend:
470
- value: null
471
- torch_compile_mode:
472
- value: null
473
- torch_dtype:
474
- value: float32
475
- torch_empty_cache_steps:
476
- value: null
477
- torchdynamo:
478
- value: null
479
- torchscript:
480
- value: false
481
- tpu_metrics_debug:
482
- value: false
483
- tpu_num_cores:
484
- value: null
485
- transformers_version:
486
- value: 4.49.0.dev0
487
- typical_p:
488
- value: 1
489
- use_bfloat16:
490
- value: false
491
- use_cache:
492
- value: false
493
- use_cpu:
494
- value: false
495
- use_ipex:
496
- value: false
497
- use_legacy_prediction_loop:
498
- value: false
499
- use_liger_kernel:
500
- value: false
501
- use_mps_device:
502
- value: false
503
- use_weighted_layer_sum:
504
- value: false
505
- vocab_size:
506
- value: 51865
507
- warmup_ratio:
508
- value: 0
509
- warmup_steps:
510
- value: 500
511
- weight_decay:
512
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125924-xhsgsxqq/files/output.log DELETED
@@ -1,22 +0,0 @@
1
- 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 632, in <module>
3
- main()
4
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main
5
- train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
- return inner_training_loop(
9
- ^^^^^^^^^^^^^^^^^^^^
10
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
- self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
- return self.call_event("on_epoch_begin", args, state, control)
15
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
- result = getattr(callback, event)(
18
- ^^^^^^^^^^^^^^^^^^^^^^^^^
19
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
20
- if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
- ^^^^^^^^^^^^^^^^^^^^^^^^
22
- AttributeError: 'NoneType' object has no attribute 'dataset'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt DELETED
@@ -1,115 +0,0 @@
1
- aiosignal==1.3.2
2
- Markdown==3.7
3
- more-itertools==10.6.0
4
- requests==2.32.3
5
- sentry-sdk==2.21.0
6
- torchaudio==2.6.0
7
- charset-normalizer==3.4.1
8
- docker-pycreds==0.4.0
9
- nvidia-cusolver-cu12==11.6.1.9
10
- PyYAML==6.0.2
11
- librosa==0.10.2.post1
12
- soxr==0.5.0.post1
13
- multiprocess==0.70.16
14
- setuptools==75.8.0
15
- nvidia-cufft-cu12==11.2.1.3
16
- joblib==1.4.2
17
- pytz==2025.1
18
- pip==24.0
19
- scikit-learn==1.6.1
20
- certifi==2025.1.31
21
- jiwer==3.1.0
22
- regex==2024.11.6
23
- annotated-types==0.7.0
24
- grpcio==1.70.0
25
- msgpack==1.1.0
26
- mpmath==1.3.0
27
- nvidia-cudnn-cu12==9.1.0.70
28
- soundfile==0.13.1
29
- dill==0.3.8
30
- nvidia-nvtx-cu12==12.4.127
31
- six==1.17.0
32
- nvidia-cuda-cupti-cu12==12.4.127
33
- pyarrow==19.0.0
34
- nvidia-nccl-cu12==2.21.5
35
- psutil==6.1.1
36
- decorator==5.1.1
37
- llvmlite==0.44.0
38
- frozenlist==1.5.0
39
- pydantic==2.10.6
40
- networkx==3.4.2
41
- idna==3.10
42
- wandb==0.19.6
43
- aiohttp==3.11.12
44
- RapidFuzz==3.12.1
45
- pandas==2.2.3
46
- python-dateutil==2.9.0.post0
47
- numpy==2.1.3
48
- tokenizers==0.21.0
49
- nvidia-cusparselt-cu12==0.6.2
50
- typing_extensions==4.12.2
51
- urllib3==2.3.0
52
- setproctitle==1.3.4
53
- tzdata==2025.1
54
- sympy==1.13.1
55
- pooch==1.8.2
56
- click==8.1.8
57
- pydantic_core==2.27.2
58
- MarkupSafe==3.0.2
59
- scipy==1.15.1
60
- accelerate==1.3.0
61
- tensorboard==2.19.0
62
- protobuf==5.29.3
63
- gitdb==4.0.12
64
- smmap==5.0.2
65
- absl-py==2.1.0
66
- tqdm==4.67.1
67
- yarl==1.18.3
68
- pycparser==2.22
69
- nvidia-cusparse-cu12==12.3.1.170
70
- attrs==25.1.0
71
- lazy_loader==0.4
72
- tensorboard-data-server==0.7.2
73
- threadpoolctl==3.5.0
74
- GitPython==3.1.44
75
- safetensors==0.5.2
76
- fsspec==2024.12.0
77
- nvidia-cuda-nvrtc-cu12==12.4.127
78
- filelock==3.17.0
79
- aiohappyeyeballs==2.4.6
80
- packaging==24.2
81
- datasets==3.2.1.dev0
82
- audioread==3.0.1
83
- propcache==0.2.1
84
- transformers==4.49.0.dev0
85
- nvidia-cuda-runtime-cu12==12.4.127
86
- cffi==1.17.1
87
- evaluate==0.4.3
88
- Werkzeug==3.1.3
89
- huggingface-hub==0.28.1
90
- Jinja2==3.1.5
91
- torch==2.6.0
92
- nvidia-curand-cu12==10.3.5.147
93
- xxhash==3.5.0
94
- platformdirs==4.3.6
95
- multidict==6.1.0
96
- nvidia-cublas-cu12==12.4.5.8
97
- nvidia-nvjitlink-cu12==12.4.127
98
- triton==3.2.0
99
- numba==0.61.0
100
- importlib_metadata==8.0.0
101
- platformdirs==4.2.2
102
- typeguard==4.3.0
103
- more-itertools==10.3.0
104
- tomli==2.0.1
105
- autocommand==2.2.2
106
- zipp==3.19.2
107
- typing_extensions==4.12.2
108
- backports.tarfile==1.2.0
109
- inflect==7.3.1
110
- jaraco.text==3.12.1
111
- wheel==0.43.0
112
- packaging==24.2
113
- jaraco.collections==5.1.0
114
- jaraco.functools==4.0.1
115
- jaraco.context==5.3.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
3
- "python": "CPython 3.12.3",
4
- "startedAt": "2025-02-12T12:59:24.816046Z",
5
- "args": [
6
- "--model_name_or_path=openai/whisper-small",
7
- "--dataset_name=asierhv/composite_corpus_eu_v2.1",
8
- "--language=basque",
9
- "--train_split_name=train",
10
- "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
11
- "--model_index_name=Whisper Small Basque",
12
- "--max_steps=8000",
13
- "--output_dir=./",
14
- "--per_device_train_batch_size=32",
15
- "--per_device_eval_batch_size=16",
16
- "--gradient_accumulation_steps=1",
17
- "--logging_steps=25",
18
- "--learning_rate=1e-5",
19
- "--warmup_steps=500",
20
- "--evaluation_strategy=steps",
21
- "--eval_steps=1000",
22
- "--save_strategy=steps",
23
- "--save_steps=1000",
24
- "--generation_max_length=225",
25
- "--length_column_name=input_length",
26
- "--max_duration_in_seconds=30",
27
- "--text_column_name=sentence",
28
- "--freeze_feature_encoder=False",
29
- "--report_to=tensorboard",
30
- "--metric_for_best_model=wer",
31
- "--greater_is_better=False",
32
- "--load_best_model_at_end",
33
- "--gradient_checkpointing",
34
- "--fp16",
35
- "--overwrite_output_dir",
36
- "--do_train",
37
- "--do_eval",
38
- "--predict_with_generate",
39
- "--do_normalize_eval",
40
- "--streaming",
41
- "--use_auth_token",
42
- "--push_to_hub",
43
- "--report_to",
44
- "wandb",
45
- "--run_name",
46
- "whisper-small-eu"
47
- ],
48
- "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
49
- "codePath": "run_speech_recognition_seq2seq_streaming.py",
50
- "git": {
51
- "remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
52
- "commit": "9c975864b20b4df94398a870e97cad2934253ec3"
53
- },
54
- "email": "[email protected]",
55
- "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
56
- "host": "tknika",
57
- "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
58
- "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
59
- "cpu_count": 8,
60
- "cpu_count_logical": 8,
61
- "gpu": "NVIDIA L40-48Q",
62
- "gpu_count": 1,
63
- "disk": {
64
- "/": {
65
- "total": "525987168256",
66
- "used": "313777115136"
67
- }
68
- },
69
- "memory": {
70
- "total": "33654022144"
71
- },
72
- "cpu": {
73
- "count": 8,
74
- "countLogical": 8
75
- },
76
- "gpu_nvidia": [
77
- {
78
- "name": "NVIDIA L40-48Q",
79
- "memoryTotal": "51539607552",
80
- "cudaCores": 18176,
81
- "architecture": "Ada"
82
- }
83
- ],
84
- "cudaVersion": "12.4"
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":0}}
 
 
wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-02-12T12:59:24.63359638Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa90v2n0h/port-226591.txt","pid":226591,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
- {"time":"2025-02-12T12:59:24.673351851Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226591}
3
- {"time":"2025-02-12T12:59:24.673324591Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41203,"Zone":""}}
4
- {"time":"2025-02-12T12:59:24.809390061Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:53388"}
5
- {"time":"2025-02-12T12:59:24.819517706Z","level":"INFO","msg":"handleInformInit: received","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
6
- {"time":"2025-02-12T12:59:24.923364896Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
7
- {"time":"2025-02-12T12:59:25.341856618Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:53388"}
8
- {"time":"2025-02-12T12:59:25.341962867Z","level":"INFO","msg":"server is shutting down"}
9
- {"time":"2025-02-12T12:59:25.341964847Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:53388"}
10
- {"time":"2025-02-12T12:59:25.342139496Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:53388"}
11
- {"time":"2025-02-12T12:59:25.569637185Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41203->127.0.0.1:53388: use of closed network connection","id":"127.0.0.1:53388"}
12
- {"time":"2025-02-12T12:59:26.643739482Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:53388"}
13
- {"time":"2025-02-12T12:59:26.643783881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:53388"}
14
- {"time":"2025-02-12T12:59:26.643808411Z","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log DELETED
@@ -1,15 +0,0 @@
1
- {"time":"2025-02-12T12:59:24.819689255Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log"}
2
- {"time":"2025-02-12T12:59:24.923303396Z","level":"INFO","msg":"created new stream","id":"xhsgsxqq"}
3
- {"time":"2025-02-12T12:59:24.923354596Z","level":"INFO","msg":"stream: started","id":"xhsgsxqq"}
4
- {"time":"2025-02-12T12:59:24.923472545Z","level":"INFO","msg":"writer: Do: started","stream_id":"xhsgsxqq"}
5
- {"time":"2025-02-12T12:59:24.923494475Z","level":"INFO","msg":"handler: started","stream_id":"xhsgsxqq"}
6
- {"time":"2025-02-12T12:59:24.923560215Z","level":"INFO","msg":"sender: started","stream_id":"xhsgsxqq"}
7
- {"time":"2025-02-12T12:59:25.192419842Z","level":"INFO","msg":"Starting system monitor"}
8
- {"time":"2025-02-12T12:59:25.341944447Z","level":"INFO","msg":"stream: closing","id":"xhsgsxqq"}
9
- {"time":"2025-02-12T12:59:25.341971537Z","level":"INFO","msg":"Stopping system monitor"}
10
- {"time":"2025-02-12T12:59:25.342739283Z","level":"INFO","msg":"Stopped system monitor"}
11
- {"time":"2025-02-12T12:59:26.408412135Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
- {"time":"2025-02-12T12:59:26.643441283Z","level":"INFO","msg":"handler: closed","stream_id":"xhsgsxqq"}
13
- {"time":"2025-02-12T12:59:26.643483513Z","level":"INFO","msg":"writer: Close: closed","stream_id":"xhsgsxqq"}
14
- {"time":"2025-02-12T12:59:26.643525403Z","level":"INFO","msg":"sender: closed","stream_id":"xhsgsxqq"}
15
- {"time":"2025-02-12T12:59:26.643566923Z","level":"INFO","msg":"stream: closed","id":"xhsgsxqq"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125924-xhsgsxqq/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Configure stats pid to 226591
3
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
4
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
5
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
7
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
8
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():756] calling init triggers
9
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
10
- config: {'_wandb': {}}
11
- 2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():789] starting backend
12
- 2025-02-12 12:59:24,809 INFO MainThread:226591 [wandb_init.py:init():793] sending inform_init request
13
- 2025-02-12 12:59:24,815 INFO MainThread:226591 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-02-12 12:59:24,815 INFO MainThread:226591 [wandb_init.py:init():808] backend started and connected
15
- 2025-02-12 12:59:24,818 INFO MainThread:226591 [wandb_init.py:init():901] updated telemetry
16
- 2025-02-12 12:59:24,823 INFO MainThread:226591 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
17
- 2025-02-12 12:59:25,189 INFO MainThread:226591 [wandb_init.py:init():994] starting run threads in backend
18
- 2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_console_start():2385] atexit reg
19
- 2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2235] redirect: wrap_raw
20
- 2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2300] Wrapping output streams.
21
- 2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2325] Redirects installed.
22
- 2025-02-12 12:59:25,303 INFO MainThread:226591 [wandb_init.py:init():1036] run started, returning control to user process
23
- 2025-02-12 12:59:25,304 INFO MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-58-59_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
24
- 2025-02-12 12:59:25,306 INFO MainThread:226591 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x76451d282f30>>
25
- 2025-02-12 12:59:25,306 INFO MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
26
- 2025-02-12 12:59:25,342 WARNING MsgRouterThr:226591 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb DELETED
Binary file (11.3 kB)
 
wandb/run-20250212_130533-zeu6vay4/files/config.yaml DELETED
@@ -1,512 +0,0 @@
1
- _attn_implementation_autoset:
2
- value: true
3
- _name_or_path:
4
- value: openai/whisper-small
5
- _wandb:
6
- value:
7
- cli_version: 0.19.6
8
- m:
9
- - "1": train/global_step
10
- "6":
11
- - 3
12
- "7": []
13
- python_version: 3.12.3
14
- t:
15
- "1":
16
- - 1
17
- - 5
18
- - 11
19
- - 49
20
- - 51
21
- - 53
22
- - 55
23
- - 71
24
- - 100
25
- "2":
26
- - 1
27
- - 5
28
- - 11
29
- - 49
30
- - 51
31
- - 53
32
- - 55
33
- - 71
34
- - 100
35
- "3":
36
- - 7
37
- - 13
38
- - 19
39
- - 23
40
- - 55
41
- - 66
42
- "4": 3.12.3
43
- "5": 0.19.6
44
- "6": 4.49.0.dev0
45
- "8":
46
- - 5
47
- "9":
48
- "1": transformers_trainer
49
- "12": 0.19.6
50
- "13": linux-x86_64
51
- accelerator_config:
52
- value:
53
- dispatch_batches: null
54
- even_batches: true
55
- gradient_accumulation_kwargs: null
56
- non_blocking: false
57
- split_batches: false
58
- use_seedable_sampler: true
59
- activation_dropout:
60
- value: 0
61
- activation_function:
62
- value: gelu
63
- adafactor:
64
- value: false
65
- adam_beta1:
66
- value: 0.9
67
- adam_beta2:
68
- value: 0.999
69
- adam_epsilon:
70
- value: 1e-08
71
- add_cross_attention:
72
- value: false
73
- apply_spec_augment:
74
- value: false
75
- architectures:
76
- value:
77
- - WhisperForConditionalGeneration
78
- attention_dropout:
79
- value: 0
80
- auto_find_batch_size:
81
- value: false
82
- average_tokens_across_devices:
83
- value: false
84
- bad_words_ids:
85
- value: null
86
- batch_eval_metrics:
87
- value: false
88
- begin_suppress_tokens:
89
- value:
90
- - 220
91
- - 50257
92
- bf16:
93
- value: false
94
- bf16_full_eval:
95
- value: false
96
- bos_token_id:
97
- value: 50257
98
- chunk_size_feed_forward:
99
- value: 0
100
- classifier_proj_size:
101
- value: 256
102
- cross_attention_hidden_size:
103
- value: null
104
- d_model:
105
- value: 768
106
- data_seed:
107
- value: null
108
- dataloader_drop_last:
109
- value: false
110
- dataloader_num_workers:
111
- value: 0
112
- dataloader_persistent_workers:
113
- value: false
114
- dataloader_pin_memory:
115
- value: true
116
- dataloader_prefetch_factor:
117
- value: null
118
- ddp_backend:
119
- value: null
120
- ddp_broadcast_buffers:
121
- value: null
122
- ddp_bucket_cap_mb:
123
- value: null
124
- ddp_find_unused_parameters:
125
- value: null
126
- ddp_timeout:
127
- value: 1800
128
- debug:
129
- value: []
130
- decoder_attention_heads:
131
- value: 12
132
- decoder_ffn_dim:
133
- value: 3072
134
- decoder_layerdrop:
135
- value: 0
136
- decoder_layers:
137
- value: 12
138
- decoder_start_token_id:
139
- value: 50258
140
- deepspeed:
141
- value: null
142
- disable_tqdm:
143
- value: false
144
- dispatch_batches:
145
- value: null
146
- diversity_penalty:
147
- value: 0
148
- do_eval:
149
- value: true
150
- do_predict:
151
- value: false
152
- do_sample:
153
- value: false
154
- do_train:
155
- value: true
156
- dropout:
157
- value: 0
158
- early_stopping:
159
- value: false
160
- encoder_attention_heads:
161
- value: 12
162
- encoder_ffn_dim:
163
- value: 3072
164
- encoder_layerdrop:
165
- value: 0
166
- encoder_layers:
167
- value: 12
168
- encoder_no_repeat_ngram_size:
169
- value: 0
170
- eos_token_id:
171
- value: 50257
172
- eval_accumulation_steps:
173
- value: null
174
- eval_delay:
175
- value: 0
176
- eval_do_concat_batches:
177
- value: true
178
- eval_on_start:
179
- value: false
180
- eval_steps:
181
- value: 1000
182
- eval_strategy:
183
- value: steps
184
- eval_use_gather_object:
185
- value: false
186
- evaluation_strategy:
187
- value: steps
188
- exponential_decay_length_penalty:
189
- value: null
190
- finetuning_task:
191
- value: null
192
- forced_bos_token_id:
193
- value: null
194
- forced_decoder_ids:
195
- value: null
196
- forced_eos_token_id:
197
- value: null
198
- fp16:
199
- value: true
200
- fp16_backend:
201
- value: auto
202
- fp16_full_eval:
203
- value: false
204
- fp16_opt_level:
205
- value: O1
206
- fsdp:
207
- value: []
208
- fsdp_config:
209
- value:
210
- min_num_params: 0
211
- xla: false
212
- xla_fsdp_grad_ckpt: false
213
- xla_fsdp_v2: false
214
- fsdp_min_num_params:
215
- value: 0
216
- fsdp_transformer_layer_cls_to_wrap:
217
- value: null
218
- full_determinism:
219
- value: false
220
- generation_config:
221
- value: null
222
- generation_max_length:
223
- value: 225
224
- generation_num_beams:
225
- value: null
226
- gradient_accumulation_steps:
227
- value: 1
228
- gradient_checkpointing:
229
- value: true
230
- gradient_checkpointing_kwargs:
231
- value: null
232
- greater_is_better:
233
- value: false
234
- group_by_length:
235
- value: false
236
- half_precision_backend:
237
- value: auto
238
- hub_always_push:
239
- value: false
240
- hub_model_id:
241
- value: null
242
- hub_private_repo:
243
- value: null
244
- hub_strategy:
245
- value: every_save
246
- hub_token:
247
- value: <HUB_TOKEN>
248
- id2label:
249
- value:
250
- "0": LABEL_0
251
- "1": LABEL_1
252
- ignore_data_skip:
253
- value: false
254
- include_for_metrics:
255
- value: []
256
- include_inputs_for_metrics:
257
- value: false
258
- include_num_input_tokens_seen:
259
- value: false
260
- include_tokens_per_second:
261
- value: false
262
- init_std:
263
- value: 0.02
264
- is_decoder:
265
- value: false
266
- is_encoder_decoder:
267
- value: true
268
- jit_mode_eval:
269
- value: false
270
- label_names:
271
- value: null
272
- label_smoothing_factor:
273
- value: 0
274
- label2id:
275
- value:
276
- LABEL_0: 0
277
- LABEL_1: 1
278
- learning_rate:
279
- value: 1e-05
280
- length_column_name:
281
- value: input_length
282
- length_penalty:
283
- value: 1
284
- load_best_model_at_end:
285
- value: true
286
- local_rank:
287
- value: 0
288
- log_level:
289
- value: passive
290
- log_level_replica:
291
- value: warning
292
- log_on_each_node:
293
- value: true
294
- logging_dir:
295
- value: ./runs/Feb12_13-05-10_tknika
296
- logging_first_step:
297
- value: false
298
- logging_nan_inf_filter:
299
- value: true
300
- logging_steps:
301
- value: 25
302
- logging_strategy:
303
- value: steps
304
- lr_scheduler_type:
305
- value: linear
306
- mask_feature_length:
307
- value: 10
308
- mask_feature_min_masks:
309
- value: 0
310
- mask_feature_prob:
311
- value: 0
312
- mask_time_length:
313
- value: 10
314
- mask_time_min_masks:
315
- value: 2
316
- mask_time_prob:
317
- value: 0.05
318
- max_grad_norm:
319
- value: 1
320
- max_length:
321
- value: 448
322
- max_source_positions:
323
- value: 1500
324
- max_steps:
325
- value: 8000
326
- max_target_positions:
327
- value: 448
328
- median_filter_width:
329
- value: 7
330
- metric_for_best_model:
331
- value: wer
332
- min_length:
333
- value: 0
334
- model/num_parameters:
335
- value: 241734912
336
- model_type:
337
- value: whisper
338
- mp_parameters:
339
- value: ""
340
- neftune_noise_alpha:
341
- value: null
342
- no_cuda:
343
- value: false
344
- no_repeat_ngram_size:
345
- value: 0
346
- num_beam_groups:
347
- value: 1
348
- num_beams:
349
- value: 1
350
- num_hidden_layers:
351
- value: 12
352
- num_mel_bins:
353
- value: 80
354
- num_return_sequences:
355
- value: 1
356
- num_train_epochs:
357
- value: 3
358
- optim:
359
- value: adamw_torch
360
- optim_args:
361
- value: null
362
- optim_target_modules:
363
- value: null
364
- output_attentions:
365
- value: false
366
- output_dir:
367
- value: ./
368
- output_hidden_states:
369
- value: false
370
- output_scores:
371
- value: false
372
- overwrite_output_dir:
373
- value: true
374
- pad_token_id:
375
- value: 50257
376
- past_index:
377
- value: -1
378
- per_device_eval_batch_size:
379
- value: 16
380
- per_device_train_batch_size:
381
- value: 32
382
- per_gpu_eval_batch_size:
383
- value: null
384
- per_gpu_train_batch_size:
385
- value: null
386
- predict_with_generate:
387
- value: true
388
- prediction_loss_only:
389
- value: false
390
- prefix:
391
- value: null
392
- problem_type:
393
- value: null
394
- push_to_hub:
395
- value: true
396
- push_to_hub_model_id:
397
- value: null
398
- push_to_hub_organization:
399
- value: null
400
- push_to_hub_token:
401
- value: <PUSH_TO_HUB_TOKEN>
402
- ray_scope:
403
- value: last
404
- remove_invalid_values:
405
- value: false
406
- remove_unused_columns:
407
- value: true
408
- repetition_penalty:
409
- value: 1
410
- report_to:
411
- value:
412
- - wandb
413
- restore_callback_states_from_checkpoint:
414
- value: false
415
- resume_from_checkpoint:
416
- value: null
417
- return_dict:
418
- value: true
419
- return_dict_in_generate:
420
- value: false
421
- run_name:
422
- value: whisper-small-eu
423
- save_on_each_node:
424
- value: false
425
- save_only_model:
426
- value: false
427
- save_safetensors:
428
- value: true
429
- save_steps:
430
- value: 1000
431
- save_strategy:
432
- value: steps
433
- save_total_limit:
434
- value: null
435
- scale_embedding:
436
- value: false
437
- seed:
438
- value: 42
439
- sep_token_id:
440
- value: null
441
- skip_memory_metrics:
442
- value: true
443
- sortish_sampler:
444
- value: false
445
- split_batches:
446
- value: null
447
- suppress_tokens:
448
- value: null
449
- task_specific_params:
450
- value: null
451
- temperature:
452
- value: 1
453
- tf_legacy_loss:
454
- value: false
455
- tf32:
456
- value: null
457
- tie_encoder_decoder:
458
- value: false
459
- tie_word_embeddings:
460
- value: true
461
- tokenizer_class:
462
- value: null
463
- top_k:
464
- value: 50
465
- top_p:
466
- value: 1
467
- torch_compile:
468
- value: false
469
- torch_compile_backend:
470
- value: null
471
- torch_compile_mode:
472
- value: null
473
- torch_dtype:
474
- value: float32
475
- torch_empty_cache_steps:
476
- value: null
477
- torchdynamo:
478
- value: null
479
- torchscript:
480
- value: false
481
- tpu_metrics_debug:
482
- value: false
483
- tpu_num_cores:
484
- value: null
485
- transformers_version:
486
- value: 4.49.0.dev0
487
- typical_p:
488
- value: 1
489
- use_bfloat16:
490
- value: false
491
- use_cache:
492
- value: false
493
- use_cpu:
494
- value: false
495
- use_ipex:
496
- value: false
497
- use_legacy_prediction_loop:
498
- value: false
499
- use_liger_kernel:
500
- value: false
501
- use_mps_device:
502
- value: false
503
- use_weighted_layer_sum:
504
- value: false
505
- vocab_size:
506
- value: 51865
507
- warmup_ratio:
508
- value: 0
509
- warmup_steps:
510
- value: 500
511
- weight_decay:
512
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250212_130533-zeu6vay4/files/output.log DELETED
@@ -1,22 +0,0 @@
1
- 0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
2
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 633, in <module>
3
- main()
4
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 582, in main
5
- train_result = trainer.train(resume_from_checkpoint=checkpoint)
6
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
8
- return inner_training_loop(
9
- ^^^^^^^^^^^^^^^^^^^^
10
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
11
- self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
12
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
14
- return self.call_event("on_epoch_begin", args, state, control)
15
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
17
- result = getattr(callback, event)(
18
- ^^^^^^^^^^^^^^^^^^^^^^^^^
19
- File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
20
- if isinstance(train_dataloader.dataset, IterableDatasetShard):
21
- ^^^^^^^^^^^^^^^^^^^^^^^^
22
- AttributeError: 'NoneType' object has no attribute 'dataset'