Xabi Ezpeleta
commited on
Commit
·
c9036e7
1
Parent(s):
70fb6a7
Ignore wanb logs
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -0
- wandb/debug-internal.log +0 -7
- wandb/debug.log +0 -25
- wandb/run-20250212_121751-d4i88lzt/files/config.yaml +0 -512
- wandb/run-20250212_121751-d4i88lzt/files/output.log +0 -22
- wandb/run-20250212_121751-d4i88lzt/files/requirements.txt +0 -115
- wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json +0 -85
- wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json +0 -1
- wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log +0 -14
- wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log +0 -15
- wandb/run-20250212_121751-d4i88lzt/logs/debug.log +0 -26
- wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb +0 -0
- wandb/run-20250212_122637-v3d3ouvn/files/config.yaml +0 -512
- wandb/run-20250212_122637-v3d3ouvn/files/output.log +0 -22
- wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt +0 -115
- wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json +0 -85
- wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json +0 -1
- wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log +0 -14
- wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log +0 -15
- wandb/run-20250212_122637-v3d3ouvn/logs/debug.log +0 -26
- wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb +0 -0
- wandb/run-20250212_122854-4m048f5s/files/config.yaml +0 -512
- wandb/run-20250212_122854-4m048f5s/files/output.log +0 -22
- wandb/run-20250212_122854-4m048f5s/files/requirements.txt +0 -115
- wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json +0 -85
- wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json +0 -1
- wandb/run-20250212_122854-4m048f5s/logs/debug-core.log +0 -14
- wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log +0 -15
- wandb/run-20250212_122854-4m048f5s/logs/debug.log +0 -26
- wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb +0 -0
- wandb/run-20250212_125202-c6xjc1gs/files/config.yaml +0 -512
- wandb/run-20250212_125202-c6xjc1gs/files/output.log +0 -22
- wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt +0 -115
- wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json +0 -85
- wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json +0 -1
- wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log +0 -14
- wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log +0 -15
- wandb/run-20250212_125202-c6xjc1gs/logs/debug.log +0 -26
- wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb +0 -0
- wandb/run-20250212_125924-xhsgsxqq/files/config.yaml +0 -512
- wandb/run-20250212_125924-xhsgsxqq/files/output.log +0 -22
- wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt +0 -115
- wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json +0 -85
- wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json +0 -1
- wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log +0 -14
- wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log +0 -15
- wandb/run-20250212_125924-xhsgsxqq/logs/debug.log +0 -26
- wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb +0 -0
- wandb/run-20250212_130533-zeu6vay4/files/config.yaml +0 -512
- wandb/run-20250212_130533-zeu6vay4/files/output.log +0 -22
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
checkpoint-*/
|
2 |
nohup.out
|
3 |
.venv/
|
|
|
|
1 |
checkpoint-*/
|
2 |
nohup.out
|
3 |
.venv/
|
4 |
+
wandb/
|
wandb/debug-internal.log
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
|
2 |
-
{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
|
3 |
-
{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
|
4 |
-
{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
|
5 |
-
{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
|
6 |
-
{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
|
7 |
-
{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/debug.log
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
|
3 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
|
7 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
|
8 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():756] calling init triggers
|
9 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
-
config: {'_wandb': {}}
|
11 |
-
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():789] starting backend
|
12 |
-
2025-02-12 15:27:10,107 INFO MainThread:243546 [wandb_init.py:init():793] sending inform_init request
|
13 |
-
2025-02-12 15:27:10,112 INFO MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
-
2025-02-12 15:27:10,113 INFO MainThread:243546 [wandb_init.py:init():808] backend started and connected
|
15 |
-
2025-02-12 15:27:10,115 INFO MainThread:243546 [wandb_init.py:init():901] updated telemetry
|
16 |
-
2025-02-12 15:27:10,122 INFO MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
-
2025-02-12 15:27:10,584 INFO MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
|
18 |
-
2025-02-12 15:27:10,691 INFO MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
-
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
-
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
-
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
-
2025-02-12 15:27:10,694 INFO MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
-
2025-02-12 15:27:10,698 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
-
2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
|
25 |
-
2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/files/config.yaml
DELETED
@@ -1,512 +0,0 @@
|
|
1 |
-
_attn_implementation_autoset:
|
2 |
-
value: true
|
3 |
-
_name_or_path:
|
4 |
-
value: openai/whisper-small
|
5 |
-
_wandb:
|
6 |
-
value:
|
7 |
-
cli_version: 0.19.6
|
8 |
-
m:
|
9 |
-
- "1": train/global_step
|
10 |
-
"6":
|
11 |
-
- 3
|
12 |
-
"7": []
|
13 |
-
python_version: 3.12.3
|
14 |
-
t:
|
15 |
-
"1":
|
16 |
-
- 1
|
17 |
-
- 5
|
18 |
-
- 11
|
19 |
-
- 49
|
20 |
-
- 51
|
21 |
-
- 53
|
22 |
-
- 55
|
23 |
-
- 71
|
24 |
-
- 100
|
25 |
-
"2":
|
26 |
-
- 1
|
27 |
-
- 5
|
28 |
-
- 11
|
29 |
-
- 49
|
30 |
-
- 51
|
31 |
-
- 53
|
32 |
-
- 55
|
33 |
-
- 71
|
34 |
-
- 100
|
35 |
-
"3":
|
36 |
-
- 7
|
37 |
-
- 13
|
38 |
-
- 19
|
39 |
-
- 23
|
40 |
-
- 55
|
41 |
-
- 66
|
42 |
-
"4": 3.12.3
|
43 |
-
"5": 0.19.6
|
44 |
-
"6": 4.49.0.dev0
|
45 |
-
"8":
|
46 |
-
- 5
|
47 |
-
"9":
|
48 |
-
"1": transformers_trainer
|
49 |
-
"12": 0.19.6
|
50 |
-
"13": linux-x86_64
|
51 |
-
accelerator_config:
|
52 |
-
value:
|
53 |
-
dispatch_batches: null
|
54 |
-
even_batches: true
|
55 |
-
gradient_accumulation_kwargs: null
|
56 |
-
non_blocking: false
|
57 |
-
split_batches: false
|
58 |
-
use_seedable_sampler: true
|
59 |
-
activation_dropout:
|
60 |
-
value: 0
|
61 |
-
activation_function:
|
62 |
-
value: gelu
|
63 |
-
adafactor:
|
64 |
-
value: false
|
65 |
-
adam_beta1:
|
66 |
-
value: 0.9
|
67 |
-
adam_beta2:
|
68 |
-
value: 0.999
|
69 |
-
adam_epsilon:
|
70 |
-
value: 1e-08
|
71 |
-
add_cross_attention:
|
72 |
-
value: false
|
73 |
-
apply_spec_augment:
|
74 |
-
value: false
|
75 |
-
architectures:
|
76 |
-
value:
|
77 |
-
- WhisperForConditionalGeneration
|
78 |
-
attention_dropout:
|
79 |
-
value: 0
|
80 |
-
auto_find_batch_size:
|
81 |
-
value: false
|
82 |
-
average_tokens_across_devices:
|
83 |
-
value: false
|
84 |
-
bad_words_ids:
|
85 |
-
value: null
|
86 |
-
batch_eval_metrics:
|
87 |
-
value: false
|
88 |
-
begin_suppress_tokens:
|
89 |
-
value:
|
90 |
-
- 220
|
91 |
-
- 50257
|
92 |
-
bf16:
|
93 |
-
value: false
|
94 |
-
bf16_full_eval:
|
95 |
-
value: false
|
96 |
-
bos_token_id:
|
97 |
-
value: 50257
|
98 |
-
chunk_size_feed_forward:
|
99 |
-
value: 0
|
100 |
-
classifier_proj_size:
|
101 |
-
value: 256
|
102 |
-
cross_attention_hidden_size:
|
103 |
-
value: null
|
104 |
-
d_model:
|
105 |
-
value: 768
|
106 |
-
data_seed:
|
107 |
-
value: null
|
108 |
-
dataloader_drop_last:
|
109 |
-
value: false
|
110 |
-
dataloader_num_workers:
|
111 |
-
value: 0
|
112 |
-
dataloader_persistent_workers:
|
113 |
-
value: false
|
114 |
-
dataloader_pin_memory:
|
115 |
-
value: true
|
116 |
-
dataloader_prefetch_factor:
|
117 |
-
value: null
|
118 |
-
ddp_backend:
|
119 |
-
value: null
|
120 |
-
ddp_broadcast_buffers:
|
121 |
-
value: null
|
122 |
-
ddp_bucket_cap_mb:
|
123 |
-
value: null
|
124 |
-
ddp_find_unused_parameters:
|
125 |
-
value: null
|
126 |
-
ddp_timeout:
|
127 |
-
value: 1800
|
128 |
-
debug:
|
129 |
-
value: []
|
130 |
-
decoder_attention_heads:
|
131 |
-
value: 12
|
132 |
-
decoder_ffn_dim:
|
133 |
-
value: 3072
|
134 |
-
decoder_layerdrop:
|
135 |
-
value: 0
|
136 |
-
decoder_layers:
|
137 |
-
value: 12
|
138 |
-
decoder_start_token_id:
|
139 |
-
value: 50258
|
140 |
-
deepspeed:
|
141 |
-
value: null
|
142 |
-
disable_tqdm:
|
143 |
-
value: false
|
144 |
-
dispatch_batches:
|
145 |
-
value: null
|
146 |
-
diversity_penalty:
|
147 |
-
value: 0
|
148 |
-
do_eval:
|
149 |
-
value: true
|
150 |
-
do_predict:
|
151 |
-
value: false
|
152 |
-
do_sample:
|
153 |
-
value: false
|
154 |
-
do_train:
|
155 |
-
value: true
|
156 |
-
dropout:
|
157 |
-
value: 0
|
158 |
-
early_stopping:
|
159 |
-
value: false
|
160 |
-
encoder_attention_heads:
|
161 |
-
value: 12
|
162 |
-
encoder_ffn_dim:
|
163 |
-
value: 3072
|
164 |
-
encoder_layerdrop:
|
165 |
-
value: 0
|
166 |
-
encoder_layers:
|
167 |
-
value: 12
|
168 |
-
encoder_no_repeat_ngram_size:
|
169 |
-
value: 0
|
170 |
-
eos_token_id:
|
171 |
-
value: 50257
|
172 |
-
eval_accumulation_steps:
|
173 |
-
value: null
|
174 |
-
eval_delay:
|
175 |
-
value: 0
|
176 |
-
eval_do_concat_batches:
|
177 |
-
value: true
|
178 |
-
eval_on_start:
|
179 |
-
value: false
|
180 |
-
eval_steps:
|
181 |
-
value: 1000
|
182 |
-
eval_strategy:
|
183 |
-
value: steps
|
184 |
-
eval_use_gather_object:
|
185 |
-
value: false
|
186 |
-
evaluation_strategy:
|
187 |
-
value: steps
|
188 |
-
exponential_decay_length_penalty:
|
189 |
-
value: null
|
190 |
-
finetuning_task:
|
191 |
-
value: null
|
192 |
-
forced_bos_token_id:
|
193 |
-
value: null
|
194 |
-
forced_decoder_ids:
|
195 |
-
value: null
|
196 |
-
forced_eos_token_id:
|
197 |
-
value: null
|
198 |
-
fp16:
|
199 |
-
value: true
|
200 |
-
fp16_backend:
|
201 |
-
value: auto
|
202 |
-
fp16_full_eval:
|
203 |
-
value: false
|
204 |
-
fp16_opt_level:
|
205 |
-
value: O1
|
206 |
-
fsdp:
|
207 |
-
value: []
|
208 |
-
fsdp_config:
|
209 |
-
value:
|
210 |
-
min_num_params: 0
|
211 |
-
xla: false
|
212 |
-
xla_fsdp_grad_ckpt: false
|
213 |
-
xla_fsdp_v2: false
|
214 |
-
fsdp_min_num_params:
|
215 |
-
value: 0
|
216 |
-
fsdp_transformer_layer_cls_to_wrap:
|
217 |
-
value: null
|
218 |
-
full_determinism:
|
219 |
-
value: false
|
220 |
-
generation_config:
|
221 |
-
value: null
|
222 |
-
generation_max_length:
|
223 |
-
value: 225
|
224 |
-
generation_num_beams:
|
225 |
-
value: null
|
226 |
-
gradient_accumulation_steps:
|
227 |
-
value: 1
|
228 |
-
gradient_checkpointing:
|
229 |
-
value: true
|
230 |
-
gradient_checkpointing_kwargs:
|
231 |
-
value: null
|
232 |
-
greater_is_better:
|
233 |
-
value: false
|
234 |
-
group_by_length:
|
235 |
-
value: false
|
236 |
-
half_precision_backend:
|
237 |
-
value: auto
|
238 |
-
hub_always_push:
|
239 |
-
value: false
|
240 |
-
hub_model_id:
|
241 |
-
value: null
|
242 |
-
hub_private_repo:
|
243 |
-
value: null
|
244 |
-
hub_strategy:
|
245 |
-
value: every_save
|
246 |
-
hub_token:
|
247 |
-
value: <HUB_TOKEN>
|
248 |
-
id2label:
|
249 |
-
value:
|
250 |
-
"0": LABEL_0
|
251 |
-
"1": LABEL_1
|
252 |
-
ignore_data_skip:
|
253 |
-
value: false
|
254 |
-
include_for_metrics:
|
255 |
-
value: []
|
256 |
-
include_inputs_for_metrics:
|
257 |
-
value: false
|
258 |
-
include_num_input_tokens_seen:
|
259 |
-
value: false
|
260 |
-
include_tokens_per_second:
|
261 |
-
value: false
|
262 |
-
init_std:
|
263 |
-
value: 0.02
|
264 |
-
is_decoder:
|
265 |
-
value: false
|
266 |
-
is_encoder_decoder:
|
267 |
-
value: true
|
268 |
-
jit_mode_eval:
|
269 |
-
value: false
|
270 |
-
label_names:
|
271 |
-
value: null
|
272 |
-
label_smoothing_factor:
|
273 |
-
value: 0
|
274 |
-
label2id:
|
275 |
-
value:
|
276 |
-
LABEL_0: 0
|
277 |
-
LABEL_1: 1
|
278 |
-
learning_rate:
|
279 |
-
value: 1e-05
|
280 |
-
length_column_name:
|
281 |
-
value: input_length
|
282 |
-
length_penalty:
|
283 |
-
value: 1
|
284 |
-
load_best_model_at_end:
|
285 |
-
value: true
|
286 |
-
local_rank:
|
287 |
-
value: 0
|
288 |
-
log_level:
|
289 |
-
value: passive
|
290 |
-
log_level_replica:
|
291 |
-
value: warning
|
292 |
-
log_on_each_node:
|
293 |
-
value: true
|
294 |
-
logging_dir:
|
295 |
-
value: ./runs/Feb12_12-17-27_tknika
|
296 |
-
logging_first_step:
|
297 |
-
value: false
|
298 |
-
logging_nan_inf_filter:
|
299 |
-
value: true
|
300 |
-
logging_steps:
|
301 |
-
value: 25
|
302 |
-
logging_strategy:
|
303 |
-
value: steps
|
304 |
-
lr_scheduler_type:
|
305 |
-
value: linear
|
306 |
-
mask_feature_length:
|
307 |
-
value: 10
|
308 |
-
mask_feature_min_masks:
|
309 |
-
value: 0
|
310 |
-
mask_feature_prob:
|
311 |
-
value: 0
|
312 |
-
mask_time_length:
|
313 |
-
value: 10
|
314 |
-
mask_time_min_masks:
|
315 |
-
value: 2
|
316 |
-
mask_time_prob:
|
317 |
-
value: 0.05
|
318 |
-
max_grad_norm:
|
319 |
-
value: 1
|
320 |
-
max_length:
|
321 |
-
value: 448
|
322 |
-
max_source_positions:
|
323 |
-
value: 1500
|
324 |
-
max_steps:
|
325 |
-
value: 8000
|
326 |
-
max_target_positions:
|
327 |
-
value: 448
|
328 |
-
median_filter_width:
|
329 |
-
value: 7
|
330 |
-
metric_for_best_model:
|
331 |
-
value: wer
|
332 |
-
min_length:
|
333 |
-
value: 0
|
334 |
-
model/num_parameters:
|
335 |
-
value: 241734912
|
336 |
-
model_type:
|
337 |
-
value: whisper
|
338 |
-
mp_parameters:
|
339 |
-
value: ""
|
340 |
-
neftune_noise_alpha:
|
341 |
-
value: null
|
342 |
-
no_cuda:
|
343 |
-
value: false
|
344 |
-
no_repeat_ngram_size:
|
345 |
-
value: 0
|
346 |
-
num_beam_groups:
|
347 |
-
value: 1
|
348 |
-
num_beams:
|
349 |
-
value: 1
|
350 |
-
num_hidden_layers:
|
351 |
-
value: 12
|
352 |
-
num_mel_bins:
|
353 |
-
value: 80
|
354 |
-
num_return_sequences:
|
355 |
-
value: 1
|
356 |
-
num_train_epochs:
|
357 |
-
value: 3
|
358 |
-
optim:
|
359 |
-
value: adamw_torch
|
360 |
-
optim_args:
|
361 |
-
value: null
|
362 |
-
optim_target_modules:
|
363 |
-
value: null
|
364 |
-
output_attentions:
|
365 |
-
value: false
|
366 |
-
output_dir:
|
367 |
-
value: ./
|
368 |
-
output_hidden_states:
|
369 |
-
value: false
|
370 |
-
output_scores:
|
371 |
-
value: false
|
372 |
-
overwrite_output_dir:
|
373 |
-
value: true
|
374 |
-
pad_token_id:
|
375 |
-
value: 50257
|
376 |
-
past_index:
|
377 |
-
value: -1
|
378 |
-
per_device_eval_batch_size:
|
379 |
-
value: 16
|
380 |
-
per_device_train_batch_size:
|
381 |
-
value: 32
|
382 |
-
per_gpu_eval_batch_size:
|
383 |
-
value: null
|
384 |
-
per_gpu_train_batch_size:
|
385 |
-
value: null
|
386 |
-
predict_with_generate:
|
387 |
-
value: true
|
388 |
-
prediction_loss_only:
|
389 |
-
value: false
|
390 |
-
prefix:
|
391 |
-
value: null
|
392 |
-
problem_type:
|
393 |
-
value: null
|
394 |
-
push_to_hub:
|
395 |
-
value: true
|
396 |
-
push_to_hub_model_id:
|
397 |
-
value: null
|
398 |
-
push_to_hub_organization:
|
399 |
-
value: null
|
400 |
-
push_to_hub_token:
|
401 |
-
value: <PUSH_TO_HUB_TOKEN>
|
402 |
-
ray_scope:
|
403 |
-
value: last
|
404 |
-
remove_invalid_values:
|
405 |
-
value: false
|
406 |
-
remove_unused_columns:
|
407 |
-
value: true
|
408 |
-
repetition_penalty:
|
409 |
-
value: 1
|
410 |
-
report_to:
|
411 |
-
value:
|
412 |
-
- wandb
|
413 |
-
restore_callback_states_from_checkpoint:
|
414 |
-
value: false
|
415 |
-
resume_from_checkpoint:
|
416 |
-
value: null
|
417 |
-
return_dict:
|
418 |
-
value: true
|
419 |
-
return_dict_in_generate:
|
420 |
-
value: false
|
421 |
-
run_name:
|
422 |
-
value: whisper-small-eu
|
423 |
-
save_on_each_node:
|
424 |
-
value: false
|
425 |
-
save_only_model:
|
426 |
-
value: false
|
427 |
-
save_safetensors:
|
428 |
-
value: true
|
429 |
-
save_steps:
|
430 |
-
value: 1000
|
431 |
-
save_strategy:
|
432 |
-
value: steps
|
433 |
-
save_total_limit:
|
434 |
-
value: null
|
435 |
-
scale_embedding:
|
436 |
-
value: false
|
437 |
-
seed:
|
438 |
-
value: 42
|
439 |
-
sep_token_id:
|
440 |
-
value: null
|
441 |
-
skip_memory_metrics:
|
442 |
-
value: true
|
443 |
-
sortish_sampler:
|
444 |
-
value: false
|
445 |
-
split_batches:
|
446 |
-
value: null
|
447 |
-
suppress_tokens:
|
448 |
-
value: null
|
449 |
-
task_specific_params:
|
450 |
-
value: null
|
451 |
-
temperature:
|
452 |
-
value: 1
|
453 |
-
tf_legacy_loss:
|
454 |
-
value: false
|
455 |
-
tf32:
|
456 |
-
value: null
|
457 |
-
tie_encoder_decoder:
|
458 |
-
value: false
|
459 |
-
tie_word_embeddings:
|
460 |
-
value: true
|
461 |
-
tokenizer_class:
|
462 |
-
value: null
|
463 |
-
top_k:
|
464 |
-
value: 50
|
465 |
-
top_p:
|
466 |
-
value: 1
|
467 |
-
torch_compile:
|
468 |
-
value: false
|
469 |
-
torch_compile_backend:
|
470 |
-
value: null
|
471 |
-
torch_compile_mode:
|
472 |
-
value: null
|
473 |
-
torch_dtype:
|
474 |
-
value: float32
|
475 |
-
torch_empty_cache_steps:
|
476 |
-
value: null
|
477 |
-
torchdynamo:
|
478 |
-
value: null
|
479 |
-
torchscript:
|
480 |
-
value: false
|
481 |
-
tpu_metrics_debug:
|
482 |
-
value: false
|
483 |
-
tpu_num_cores:
|
484 |
-
value: null
|
485 |
-
transformers_version:
|
486 |
-
value: 4.49.0.dev0
|
487 |
-
typical_p:
|
488 |
-
value: 1
|
489 |
-
use_bfloat16:
|
490 |
-
value: false
|
491 |
-
use_cache:
|
492 |
-
value: false
|
493 |
-
use_cpu:
|
494 |
-
value: false
|
495 |
-
use_ipex:
|
496 |
-
value: false
|
497 |
-
use_legacy_prediction_loop:
|
498 |
-
value: false
|
499 |
-
use_liger_kernel:
|
500 |
-
value: false
|
501 |
-
use_mps_device:
|
502 |
-
value: false
|
503 |
-
use_weighted_layer_sum:
|
504 |
-
value: false
|
505 |
-
vocab_size:
|
506 |
-
value: 51865
|
507 |
-
warmup_ratio:
|
508 |
-
value: 0
|
509 |
-
warmup_steps:
|
510 |
-
value: 500
|
511 |
-
weight_decay:
|
512 |
-
value: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/files/output.log
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
-
main()
|
4 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
-
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
-
return inner_training_loop(
|
9 |
-
^^^^^^^^^^^^^^^^^^^^
|
10 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
-
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
-
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
-
result = getattr(callback, event)(
|
18 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
-
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
-
AttributeError: 'NoneType' object has no attribute 'dataset'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/files/requirements.txt
DELETED
@@ -1,115 +0,0 @@
|
|
1 |
-
aiosignal==1.3.2
|
2 |
-
Markdown==3.7
|
3 |
-
more-itertools==10.6.0
|
4 |
-
requests==2.32.3
|
5 |
-
sentry-sdk==2.21.0
|
6 |
-
torchaudio==2.6.0
|
7 |
-
charset-normalizer==3.4.1
|
8 |
-
docker-pycreds==0.4.0
|
9 |
-
nvidia-cusolver-cu12==11.6.1.9
|
10 |
-
PyYAML==6.0.2
|
11 |
-
librosa==0.10.2.post1
|
12 |
-
soxr==0.5.0.post1
|
13 |
-
multiprocess==0.70.16
|
14 |
-
setuptools==75.8.0
|
15 |
-
nvidia-cufft-cu12==11.2.1.3
|
16 |
-
joblib==1.4.2
|
17 |
-
pytz==2025.1
|
18 |
-
pip==24.0
|
19 |
-
scikit-learn==1.6.1
|
20 |
-
certifi==2025.1.31
|
21 |
-
jiwer==3.1.0
|
22 |
-
regex==2024.11.6
|
23 |
-
annotated-types==0.7.0
|
24 |
-
grpcio==1.70.0
|
25 |
-
msgpack==1.1.0
|
26 |
-
mpmath==1.3.0
|
27 |
-
nvidia-cudnn-cu12==9.1.0.70
|
28 |
-
soundfile==0.13.1
|
29 |
-
dill==0.3.8
|
30 |
-
nvidia-nvtx-cu12==12.4.127
|
31 |
-
six==1.17.0
|
32 |
-
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
-
pyarrow==19.0.0
|
34 |
-
nvidia-nccl-cu12==2.21.5
|
35 |
-
psutil==6.1.1
|
36 |
-
decorator==5.1.1
|
37 |
-
llvmlite==0.44.0
|
38 |
-
frozenlist==1.5.0
|
39 |
-
pydantic==2.10.6
|
40 |
-
networkx==3.4.2
|
41 |
-
idna==3.10
|
42 |
-
wandb==0.19.6
|
43 |
-
aiohttp==3.11.12
|
44 |
-
RapidFuzz==3.12.1
|
45 |
-
pandas==2.2.3
|
46 |
-
python-dateutil==2.9.0.post0
|
47 |
-
numpy==2.1.3
|
48 |
-
tokenizers==0.21.0
|
49 |
-
nvidia-cusparselt-cu12==0.6.2
|
50 |
-
typing_extensions==4.12.2
|
51 |
-
urllib3==2.3.0
|
52 |
-
setproctitle==1.3.4
|
53 |
-
tzdata==2025.1
|
54 |
-
sympy==1.13.1
|
55 |
-
pooch==1.8.2
|
56 |
-
click==8.1.8
|
57 |
-
pydantic_core==2.27.2
|
58 |
-
MarkupSafe==3.0.2
|
59 |
-
scipy==1.15.1
|
60 |
-
accelerate==1.3.0
|
61 |
-
tensorboard==2.19.0
|
62 |
-
protobuf==5.29.3
|
63 |
-
gitdb==4.0.12
|
64 |
-
smmap==5.0.2
|
65 |
-
absl-py==2.1.0
|
66 |
-
tqdm==4.67.1
|
67 |
-
yarl==1.18.3
|
68 |
-
pycparser==2.22
|
69 |
-
nvidia-cusparse-cu12==12.3.1.170
|
70 |
-
attrs==25.1.0
|
71 |
-
lazy_loader==0.4
|
72 |
-
tensorboard-data-server==0.7.2
|
73 |
-
threadpoolctl==3.5.0
|
74 |
-
GitPython==3.1.44
|
75 |
-
safetensors==0.5.2
|
76 |
-
fsspec==2024.12.0
|
77 |
-
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
-
filelock==3.17.0
|
79 |
-
aiohappyeyeballs==2.4.6
|
80 |
-
packaging==24.2
|
81 |
-
datasets==3.2.1.dev0
|
82 |
-
audioread==3.0.1
|
83 |
-
propcache==0.2.1
|
84 |
-
transformers==4.49.0.dev0
|
85 |
-
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
-
cffi==1.17.1
|
87 |
-
evaluate==0.4.3
|
88 |
-
Werkzeug==3.1.3
|
89 |
-
huggingface-hub==0.28.1
|
90 |
-
Jinja2==3.1.5
|
91 |
-
torch==2.6.0
|
92 |
-
nvidia-curand-cu12==10.3.5.147
|
93 |
-
xxhash==3.5.0
|
94 |
-
platformdirs==4.3.6
|
95 |
-
multidict==6.1.0
|
96 |
-
nvidia-cublas-cu12==12.4.5.8
|
97 |
-
nvidia-nvjitlink-cu12==12.4.127
|
98 |
-
triton==3.2.0
|
99 |
-
numba==0.61.0
|
100 |
-
importlib_metadata==8.0.0
|
101 |
-
platformdirs==4.2.2
|
102 |
-
typeguard==4.3.0
|
103 |
-
more-itertools==10.3.0
|
104 |
-
tomli==2.0.1
|
105 |
-
autocommand==2.2.2
|
106 |
-
zipp==3.19.2
|
107 |
-
typing_extensions==4.12.2
|
108 |
-
backports.tarfile==1.2.0
|
109 |
-
inflect==7.3.1
|
110 |
-
jaraco.text==3.12.1
|
111 |
-
wheel==0.43.0
|
112 |
-
packaging==24.2
|
113 |
-
jaraco.collections==5.1.0
|
114 |
-
jaraco.functools==4.0.1
|
115 |
-
jaraco.context==5.3.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
-
"python": "CPython 3.12.3",
|
4 |
-
"startedAt": "2025-02-12T12:17:51.527114Z",
|
5 |
-
"args": [
|
6 |
-
"--model_name_or_path=openai/whisper-small",
|
7 |
-
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
-
"--language=basque",
|
9 |
-
"--train_split_name=train",
|
10 |
-
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
11 |
-
"--model_index_name=Whisper Small Basque",
|
12 |
-
"--max_steps=8000",
|
13 |
-
"--output_dir=./",
|
14 |
-
"--per_device_train_batch_size=32",
|
15 |
-
"--per_device_eval_batch_size=16",
|
16 |
-
"--gradient_accumulation_steps=1",
|
17 |
-
"--logging_steps=25",
|
18 |
-
"--learning_rate=1e-5",
|
19 |
-
"--warmup_steps=500",
|
20 |
-
"--evaluation_strategy=steps",
|
21 |
-
"--eval_steps=1000",
|
22 |
-
"--save_strategy=steps",
|
23 |
-
"--save_steps=1000",
|
24 |
-
"--generation_max_length=225",
|
25 |
-
"--length_column_name=input_length",
|
26 |
-
"--max_duration_in_seconds=30",
|
27 |
-
"--text_column_name=sentence",
|
28 |
-
"--freeze_feature_encoder=False",
|
29 |
-
"--report_to=tensorboard",
|
30 |
-
"--metric_for_best_model=wer",
|
31 |
-
"--greater_is_better=False",
|
32 |
-
"--load_best_model_at_end",
|
33 |
-
"--gradient_checkpointing",
|
34 |
-
"--fp16",
|
35 |
-
"--overwrite_output_dir",
|
36 |
-
"--do_train",
|
37 |
-
"--do_eval",
|
38 |
-
"--predict_with_generate",
|
39 |
-
"--do_normalize_eval",
|
40 |
-
"--streaming",
|
41 |
-
"--use_auth_token",
|
42 |
-
"--push_to_hub",
|
43 |
-
"--report_to",
|
44 |
-
"wandb",
|
45 |
-
"--run_name",
|
46 |
-
"whisper-small-eu"
|
47 |
-
],
|
48 |
-
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
-
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
-
"git": {
|
51 |
-
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
-
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
-
},
|
54 |
-
"email": "[email protected]",
|
55 |
-
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
-
"host": "tknika",
|
57 |
-
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
-
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
-
"cpu_count": 8,
|
60 |
-
"cpu_count_logical": 8,
|
61 |
-
"gpu": "NVIDIA L40-48Q",
|
62 |
-
"gpu_count": 1,
|
63 |
-
"disk": {
|
64 |
-
"/": {
|
65 |
-
"total": "525987168256",
|
66 |
-
"used": "297346564096"
|
67 |
-
}
|
68 |
-
},
|
69 |
-
"memory": {
|
70 |
-
"total": "33654022144"
|
71 |
-
},
|
72 |
-
"cpu": {
|
73 |
-
"count": 8,
|
74 |
-
"countLogical": 8
|
75 |
-
},
|
76 |
-
"gpu_nvidia": [
|
77 |
-
{
|
78 |
-
"name": "NVIDIA L40-48Q",
|
79 |
-
"memoryTotal": "51539607552",
|
80 |
-
"cudaCores": 18176,
|
81 |
-
"architecture": "Ada"
|
82 |
-
}
|
83 |
-
],
|
84 |
-
"cudaVersion": "12.4"
|
85 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"_wandb":{"runtime":0}}
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
-
{"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392}
|
3 |
-
{"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}}
|
4 |
-
{"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"}
|
5 |
-
{"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
|
6 |
-
{"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
|
7 |
-
{"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"}
|
8 |
-
{"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"}
|
9 |
-
{"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"}
|
10 |
-
{"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"}
|
11 |
-
{"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"}
|
12 |
-
{"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"}
|
13 |
-
{"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"}
|
14 |
-
{"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"}
|
2 |
-
{"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"}
|
3 |
-
{"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"}
|
4 |
-
{"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"}
|
5 |
-
{"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"}
|
6 |
-
{"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"}
|
7 |
-
{"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
-
{"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"}
|
9 |
-
{"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
-
{"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
-
{"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
-
{"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"}
|
13 |
-
{"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"}
|
14 |
-
{"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"}
|
15 |
-
{"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/logs/debug.log
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392
|
3 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
|
7 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
|
8 |
-
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:init():756] calling init triggers
|
9 |
-
2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
-
config: {'_wandb': {}}
|
11 |
-
2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():789] starting backend
|
12 |
-
2025-02-12 12:17:51,521 INFO MainThread:223392 [wandb_init.py:init():793] sending inform_init request
|
13 |
-
2025-02-12 12:17:51,526 INFO MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
-
2025-02-12 12:17:51,526 INFO MainThread:223392 [wandb_init.py:init():808] backend started and connected
|
15 |
-
2025-02-12 12:17:51,528 INFO MainThread:223392 [wandb_init.py:init():901] updated telemetry
|
16 |
-
2025-02-12 12:17:51,535 INFO MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
-
2025-02-12 12:17:51,944 INFO MainThread:223392 [wandb_init.py:init():994] starting run threads in backend
|
18 |
-
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
-
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
-
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
-
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
-
2025-02-12 12:17:52,051 INFO MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
-
2025-02-12 12:17:52,052 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
-
2025-02-12 12:17:52,054 INFO MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75ef87e92c00>>
|
25 |
-
2025-02-12 12:17:52,055 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
-
2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb
DELETED
Binary file (11.3 kB)
|
|
wandb/run-20250212_122637-v3d3ouvn/files/config.yaml
DELETED
@@ -1,512 +0,0 @@
|
|
1 |
-
_attn_implementation_autoset:
|
2 |
-
value: true
|
3 |
-
_name_or_path:
|
4 |
-
value: openai/whisper-small
|
5 |
-
_wandb:
|
6 |
-
value:
|
7 |
-
cli_version: 0.19.6
|
8 |
-
m:
|
9 |
-
- "1": train/global_step
|
10 |
-
"6":
|
11 |
-
- 3
|
12 |
-
"7": []
|
13 |
-
python_version: 3.12.3
|
14 |
-
t:
|
15 |
-
"1":
|
16 |
-
- 1
|
17 |
-
- 5
|
18 |
-
- 11
|
19 |
-
- 49
|
20 |
-
- 51
|
21 |
-
- 53
|
22 |
-
- 55
|
23 |
-
- 71
|
24 |
-
- 100
|
25 |
-
"2":
|
26 |
-
- 1
|
27 |
-
- 5
|
28 |
-
- 11
|
29 |
-
- 49
|
30 |
-
- 51
|
31 |
-
- 53
|
32 |
-
- 55
|
33 |
-
- 71
|
34 |
-
- 100
|
35 |
-
"3":
|
36 |
-
- 7
|
37 |
-
- 13
|
38 |
-
- 19
|
39 |
-
- 23
|
40 |
-
- 55
|
41 |
-
- 66
|
42 |
-
"4": 3.12.3
|
43 |
-
"5": 0.19.6
|
44 |
-
"6": 4.49.0.dev0
|
45 |
-
"8":
|
46 |
-
- 5
|
47 |
-
"9":
|
48 |
-
"1": transformers_trainer
|
49 |
-
"12": 0.19.6
|
50 |
-
"13": linux-x86_64
|
51 |
-
accelerator_config:
|
52 |
-
value:
|
53 |
-
dispatch_batches: null
|
54 |
-
even_batches: true
|
55 |
-
gradient_accumulation_kwargs: null
|
56 |
-
non_blocking: false
|
57 |
-
split_batches: false
|
58 |
-
use_seedable_sampler: true
|
59 |
-
activation_dropout:
|
60 |
-
value: 0
|
61 |
-
activation_function:
|
62 |
-
value: gelu
|
63 |
-
adafactor:
|
64 |
-
value: false
|
65 |
-
adam_beta1:
|
66 |
-
value: 0.9
|
67 |
-
adam_beta2:
|
68 |
-
value: 0.999
|
69 |
-
adam_epsilon:
|
70 |
-
value: 1e-08
|
71 |
-
add_cross_attention:
|
72 |
-
value: false
|
73 |
-
apply_spec_augment:
|
74 |
-
value: false
|
75 |
-
architectures:
|
76 |
-
value:
|
77 |
-
- WhisperForConditionalGeneration
|
78 |
-
attention_dropout:
|
79 |
-
value: 0
|
80 |
-
auto_find_batch_size:
|
81 |
-
value: false
|
82 |
-
average_tokens_across_devices:
|
83 |
-
value: false
|
84 |
-
bad_words_ids:
|
85 |
-
value: null
|
86 |
-
batch_eval_metrics:
|
87 |
-
value: false
|
88 |
-
begin_suppress_tokens:
|
89 |
-
value:
|
90 |
-
- 220
|
91 |
-
- 50257
|
92 |
-
bf16:
|
93 |
-
value: false
|
94 |
-
bf16_full_eval:
|
95 |
-
value: false
|
96 |
-
bos_token_id:
|
97 |
-
value: 50257
|
98 |
-
chunk_size_feed_forward:
|
99 |
-
value: 0
|
100 |
-
classifier_proj_size:
|
101 |
-
value: 256
|
102 |
-
cross_attention_hidden_size:
|
103 |
-
value: null
|
104 |
-
d_model:
|
105 |
-
value: 768
|
106 |
-
data_seed:
|
107 |
-
value: null
|
108 |
-
dataloader_drop_last:
|
109 |
-
value: false
|
110 |
-
dataloader_num_workers:
|
111 |
-
value: 0
|
112 |
-
dataloader_persistent_workers:
|
113 |
-
value: false
|
114 |
-
dataloader_pin_memory:
|
115 |
-
value: true
|
116 |
-
dataloader_prefetch_factor:
|
117 |
-
value: null
|
118 |
-
ddp_backend:
|
119 |
-
value: null
|
120 |
-
ddp_broadcast_buffers:
|
121 |
-
value: null
|
122 |
-
ddp_bucket_cap_mb:
|
123 |
-
value: null
|
124 |
-
ddp_find_unused_parameters:
|
125 |
-
value: null
|
126 |
-
ddp_timeout:
|
127 |
-
value: 1800
|
128 |
-
debug:
|
129 |
-
value: []
|
130 |
-
decoder_attention_heads:
|
131 |
-
value: 12
|
132 |
-
decoder_ffn_dim:
|
133 |
-
value: 3072
|
134 |
-
decoder_layerdrop:
|
135 |
-
value: 0
|
136 |
-
decoder_layers:
|
137 |
-
value: 12
|
138 |
-
decoder_start_token_id:
|
139 |
-
value: 50258
|
140 |
-
deepspeed:
|
141 |
-
value: null
|
142 |
-
disable_tqdm:
|
143 |
-
value: false
|
144 |
-
dispatch_batches:
|
145 |
-
value: null
|
146 |
-
diversity_penalty:
|
147 |
-
value: 0
|
148 |
-
do_eval:
|
149 |
-
value: true
|
150 |
-
do_predict:
|
151 |
-
value: false
|
152 |
-
do_sample:
|
153 |
-
value: false
|
154 |
-
do_train:
|
155 |
-
value: true
|
156 |
-
dropout:
|
157 |
-
value: 0
|
158 |
-
early_stopping:
|
159 |
-
value: false
|
160 |
-
encoder_attention_heads:
|
161 |
-
value: 12
|
162 |
-
encoder_ffn_dim:
|
163 |
-
value: 3072
|
164 |
-
encoder_layerdrop:
|
165 |
-
value: 0
|
166 |
-
encoder_layers:
|
167 |
-
value: 12
|
168 |
-
encoder_no_repeat_ngram_size:
|
169 |
-
value: 0
|
170 |
-
eos_token_id:
|
171 |
-
value: 50257
|
172 |
-
eval_accumulation_steps:
|
173 |
-
value: null
|
174 |
-
eval_delay:
|
175 |
-
value: 0
|
176 |
-
eval_do_concat_batches:
|
177 |
-
value: true
|
178 |
-
eval_on_start:
|
179 |
-
value: false
|
180 |
-
eval_steps:
|
181 |
-
value: 1000
|
182 |
-
eval_strategy:
|
183 |
-
value: steps
|
184 |
-
eval_use_gather_object:
|
185 |
-
value: false
|
186 |
-
evaluation_strategy:
|
187 |
-
value: steps
|
188 |
-
exponential_decay_length_penalty:
|
189 |
-
value: null
|
190 |
-
finetuning_task:
|
191 |
-
value: null
|
192 |
-
forced_bos_token_id:
|
193 |
-
value: null
|
194 |
-
forced_decoder_ids:
|
195 |
-
value: null
|
196 |
-
forced_eos_token_id:
|
197 |
-
value: null
|
198 |
-
fp16:
|
199 |
-
value: true
|
200 |
-
fp16_backend:
|
201 |
-
value: auto
|
202 |
-
fp16_full_eval:
|
203 |
-
value: false
|
204 |
-
fp16_opt_level:
|
205 |
-
value: O1
|
206 |
-
fsdp:
|
207 |
-
value: []
|
208 |
-
fsdp_config:
|
209 |
-
value:
|
210 |
-
min_num_params: 0
|
211 |
-
xla: false
|
212 |
-
xla_fsdp_grad_ckpt: false
|
213 |
-
xla_fsdp_v2: false
|
214 |
-
fsdp_min_num_params:
|
215 |
-
value: 0
|
216 |
-
fsdp_transformer_layer_cls_to_wrap:
|
217 |
-
value: null
|
218 |
-
full_determinism:
|
219 |
-
value: false
|
220 |
-
generation_config:
|
221 |
-
value: null
|
222 |
-
generation_max_length:
|
223 |
-
value: 225
|
224 |
-
generation_num_beams:
|
225 |
-
value: null
|
226 |
-
gradient_accumulation_steps:
|
227 |
-
value: 1
|
228 |
-
gradient_checkpointing:
|
229 |
-
value: true
|
230 |
-
gradient_checkpointing_kwargs:
|
231 |
-
value: null
|
232 |
-
greater_is_better:
|
233 |
-
value: false
|
234 |
-
group_by_length:
|
235 |
-
value: false
|
236 |
-
half_precision_backend:
|
237 |
-
value: auto
|
238 |
-
hub_always_push:
|
239 |
-
value: false
|
240 |
-
hub_model_id:
|
241 |
-
value: null
|
242 |
-
hub_private_repo:
|
243 |
-
value: null
|
244 |
-
hub_strategy:
|
245 |
-
value: every_save
|
246 |
-
hub_token:
|
247 |
-
value: <HUB_TOKEN>
|
248 |
-
id2label:
|
249 |
-
value:
|
250 |
-
"0": LABEL_0
|
251 |
-
"1": LABEL_1
|
252 |
-
ignore_data_skip:
|
253 |
-
value: false
|
254 |
-
include_for_metrics:
|
255 |
-
value: []
|
256 |
-
include_inputs_for_metrics:
|
257 |
-
value: false
|
258 |
-
include_num_input_tokens_seen:
|
259 |
-
value: false
|
260 |
-
include_tokens_per_second:
|
261 |
-
value: false
|
262 |
-
init_std:
|
263 |
-
value: 0.02
|
264 |
-
is_decoder:
|
265 |
-
value: false
|
266 |
-
is_encoder_decoder:
|
267 |
-
value: true
|
268 |
-
jit_mode_eval:
|
269 |
-
value: false
|
270 |
-
label_names:
|
271 |
-
value: null
|
272 |
-
label_smoothing_factor:
|
273 |
-
value: 0
|
274 |
-
label2id:
|
275 |
-
value:
|
276 |
-
LABEL_0: 0
|
277 |
-
LABEL_1: 1
|
278 |
-
learning_rate:
|
279 |
-
value: 1e-05
|
280 |
-
length_column_name:
|
281 |
-
value: input_length
|
282 |
-
length_penalty:
|
283 |
-
value: 1
|
284 |
-
load_best_model_at_end:
|
285 |
-
value: true
|
286 |
-
local_rank:
|
287 |
-
value: 0
|
288 |
-
log_level:
|
289 |
-
value: passive
|
290 |
-
log_level_replica:
|
291 |
-
value: warning
|
292 |
-
log_on_each_node:
|
293 |
-
value: true
|
294 |
-
logging_dir:
|
295 |
-
value: ./runs/Feb12_12-26-11_tknika
|
296 |
-
logging_first_step:
|
297 |
-
value: false
|
298 |
-
logging_nan_inf_filter:
|
299 |
-
value: true
|
300 |
-
logging_steps:
|
301 |
-
value: 25
|
302 |
-
logging_strategy:
|
303 |
-
value: steps
|
304 |
-
lr_scheduler_type:
|
305 |
-
value: linear
|
306 |
-
mask_feature_length:
|
307 |
-
value: 10
|
308 |
-
mask_feature_min_masks:
|
309 |
-
value: 0
|
310 |
-
mask_feature_prob:
|
311 |
-
value: 0
|
312 |
-
mask_time_length:
|
313 |
-
value: 10
|
314 |
-
mask_time_min_masks:
|
315 |
-
value: 2
|
316 |
-
mask_time_prob:
|
317 |
-
value: 0.05
|
318 |
-
max_grad_norm:
|
319 |
-
value: 1
|
320 |
-
max_length:
|
321 |
-
value: 448
|
322 |
-
max_source_positions:
|
323 |
-
value: 1500
|
324 |
-
max_steps:
|
325 |
-
value: 8000
|
326 |
-
max_target_positions:
|
327 |
-
value: 448
|
328 |
-
median_filter_width:
|
329 |
-
value: 7
|
330 |
-
metric_for_best_model:
|
331 |
-
value: wer
|
332 |
-
min_length:
|
333 |
-
value: 0
|
334 |
-
model/num_parameters:
|
335 |
-
value: 241734912
|
336 |
-
model_type:
|
337 |
-
value: whisper
|
338 |
-
mp_parameters:
|
339 |
-
value: ""
|
340 |
-
neftune_noise_alpha:
|
341 |
-
value: null
|
342 |
-
no_cuda:
|
343 |
-
value: false
|
344 |
-
no_repeat_ngram_size:
|
345 |
-
value: 0
|
346 |
-
num_beam_groups:
|
347 |
-
value: 1
|
348 |
-
num_beams:
|
349 |
-
value: 1
|
350 |
-
num_hidden_layers:
|
351 |
-
value: 12
|
352 |
-
num_mel_bins:
|
353 |
-
value: 80
|
354 |
-
num_return_sequences:
|
355 |
-
value: 1
|
356 |
-
num_train_epochs:
|
357 |
-
value: 3
|
358 |
-
optim:
|
359 |
-
value: adamw_torch
|
360 |
-
optim_args:
|
361 |
-
value: null
|
362 |
-
optim_target_modules:
|
363 |
-
value: null
|
364 |
-
output_attentions:
|
365 |
-
value: false
|
366 |
-
output_dir:
|
367 |
-
value: ./
|
368 |
-
output_hidden_states:
|
369 |
-
value: false
|
370 |
-
output_scores:
|
371 |
-
value: false
|
372 |
-
overwrite_output_dir:
|
373 |
-
value: true
|
374 |
-
pad_token_id:
|
375 |
-
value: 50257
|
376 |
-
past_index:
|
377 |
-
value: -1
|
378 |
-
per_device_eval_batch_size:
|
379 |
-
value: 16
|
380 |
-
per_device_train_batch_size:
|
381 |
-
value: 32
|
382 |
-
per_gpu_eval_batch_size:
|
383 |
-
value: null
|
384 |
-
per_gpu_train_batch_size:
|
385 |
-
value: null
|
386 |
-
predict_with_generate:
|
387 |
-
value: true
|
388 |
-
prediction_loss_only:
|
389 |
-
value: false
|
390 |
-
prefix:
|
391 |
-
value: null
|
392 |
-
problem_type:
|
393 |
-
value: null
|
394 |
-
push_to_hub:
|
395 |
-
value: true
|
396 |
-
push_to_hub_model_id:
|
397 |
-
value: null
|
398 |
-
push_to_hub_organization:
|
399 |
-
value: null
|
400 |
-
push_to_hub_token:
|
401 |
-
value: <PUSH_TO_HUB_TOKEN>
|
402 |
-
ray_scope:
|
403 |
-
value: last
|
404 |
-
remove_invalid_values:
|
405 |
-
value: false
|
406 |
-
remove_unused_columns:
|
407 |
-
value: true
|
408 |
-
repetition_penalty:
|
409 |
-
value: 1
|
410 |
-
report_to:
|
411 |
-
value:
|
412 |
-
- wandb
|
413 |
-
restore_callback_states_from_checkpoint:
|
414 |
-
value: false
|
415 |
-
resume_from_checkpoint:
|
416 |
-
value: null
|
417 |
-
return_dict:
|
418 |
-
value: true
|
419 |
-
return_dict_in_generate:
|
420 |
-
value: false
|
421 |
-
run_name:
|
422 |
-
value: whisper-small-eu
|
423 |
-
save_on_each_node:
|
424 |
-
value: false
|
425 |
-
save_only_model:
|
426 |
-
value: false
|
427 |
-
save_safetensors:
|
428 |
-
value: true
|
429 |
-
save_steps:
|
430 |
-
value: 1000
|
431 |
-
save_strategy:
|
432 |
-
value: steps
|
433 |
-
save_total_limit:
|
434 |
-
value: null
|
435 |
-
scale_embedding:
|
436 |
-
value: false
|
437 |
-
seed:
|
438 |
-
value: 42
|
439 |
-
sep_token_id:
|
440 |
-
value: null
|
441 |
-
skip_memory_metrics:
|
442 |
-
value: true
|
443 |
-
sortish_sampler:
|
444 |
-
value: false
|
445 |
-
split_batches:
|
446 |
-
value: null
|
447 |
-
suppress_tokens:
|
448 |
-
value: null
|
449 |
-
task_specific_params:
|
450 |
-
value: null
|
451 |
-
temperature:
|
452 |
-
value: 1
|
453 |
-
tf_legacy_loss:
|
454 |
-
value: false
|
455 |
-
tf32:
|
456 |
-
value: null
|
457 |
-
tie_encoder_decoder:
|
458 |
-
value: false
|
459 |
-
tie_word_embeddings:
|
460 |
-
value: true
|
461 |
-
tokenizer_class:
|
462 |
-
value: null
|
463 |
-
top_k:
|
464 |
-
value: 50
|
465 |
-
top_p:
|
466 |
-
value: 1
|
467 |
-
torch_compile:
|
468 |
-
value: false
|
469 |
-
torch_compile_backend:
|
470 |
-
value: null
|
471 |
-
torch_compile_mode:
|
472 |
-
value: null
|
473 |
-
torch_dtype:
|
474 |
-
value: float32
|
475 |
-
torch_empty_cache_steps:
|
476 |
-
value: null
|
477 |
-
torchdynamo:
|
478 |
-
value: null
|
479 |
-
torchscript:
|
480 |
-
value: false
|
481 |
-
tpu_metrics_debug:
|
482 |
-
value: false
|
483 |
-
tpu_num_cores:
|
484 |
-
value: null
|
485 |
-
transformers_version:
|
486 |
-
value: 4.49.0.dev0
|
487 |
-
typical_p:
|
488 |
-
value: 1
|
489 |
-
use_bfloat16:
|
490 |
-
value: false
|
491 |
-
use_cache:
|
492 |
-
value: false
|
493 |
-
use_cpu:
|
494 |
-
value: false
|
495 |
-
use_ipex:
|
496 |
-
value: false
|
497 |
-
use_legacy_prediction_loop:
|
498 |
-
value: false
|
499 |
-
use_liger_kernel:
|
500 |
-
value: false
|
501 |
-
use_mps_device:
|
502 |
-
value: false
|
503 |
-
use_weighted_layer_sum:
|
504 |
-
value: false
|
505 |
-
vocab_size:
|
506 |
-
value: 51865
|
507 |
-
warmup_ratio:
|
508 |
-
value: 0
|
509 |
-
warmup_steps:
|
510 |
-
value: 500
|
511 |
-
weight_decay:
|
512 |
-
value: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/files/output.log
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
-
main()
|
4 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
-
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
-
return inner_training_loop(
|
9 |
-
^^^^^^^^^^^^^^^^^^^^
|
10 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
-
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
-
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
-
result = getattr(callback, event)(
|
18 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
-
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
-
AttributeError: 'NoneType' object has no attribute 'dataset'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt
DELETED
@@ -1,115 +0,0 @@
|
|
1 |
-
aiosignal==1.3.2
|
2 |
-
Markdown==3.7
|
3 |
-
more-itertools==10.6.0
|
4 |
-
requests==2.32.3
|
5 |
-
sentry-sdk==2.21.0
|
6 |
-
torchaudio==2.6.0
|
7 |
-
charset-normalizer==3.4.1
|
8 |
-
docker-pycreds==0.4.0
|
9 |
-
nvidia-cusolver-cu12==11.6.1.9
|
10 |
-
PyYAML==6.0.2
|
11 |
-
librosa==0.10.2.post1
|
12 |
-
soxr==0.5.0.post1
|
13 |
-
multiprocess==0.70.16
|
14 |
-
setuptools==75.8.0
|
15 |
-
nvidia-cufft-cu12==11.2.1.3
|
16 |
-
joblib==1.4.2
|
17 |
-
pytz==2025.1
|
18 |
-
pip==24.0
|
19 |
-
scikit-learn==1.6.1
|
20 |
-
certifi==2025.1.31
|
21 |
-
jiwer==3.1.0
|
22 |
-
regex==2024.11.6
|
23 |
-
annotated-types==0.7.0
|
24 |
-
grpcio==1.70.0
|
25 |
-
msgpack==1.1.0
|
26 |
-
mpmath==1.3.0
|
27 |
-
nvidia-cudnn-cu12==9.1.0.70
|
28 |
-
soundfile==0.13.1
|
29 |
-
dill==0.3.8
|
30 |
-
nvidia-nvtx-cu12==12.4.127
|
31 |
-
six==1.17.0
|
32 |
-
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
-
pyarrow==19.0.0
|
34 |
-
nvidia-nccl-cu12==2.21.5
|
35 |
-
psutil==6.1.1
|
36 |
-
decorator==5.1.1
|
37 |
-
llvmlite==0.44.0
|
38 |
-
frozenlist==1.5.0
|
39 |
-
pydantic==2.10.6
|
40 |
-
networkx==3.4.2
|
41 |
-
idna==3.10
|
42 |
-
wandb==0.19.6
|
43 |
-
aiohttp==3.11.12
|
44 |
-
RapidFuzz==3.12.1
|
45 |
-
pandas==2.2.3
|
46 |
-
python-dateutil==2.9.0.post0
|
47 |
-
numpy==2.1.3
|
48 |
-
tokenizers==0.21.0
|
49 |
-
nvidia-cusparselt-cu12==0.6.2
|
50 |
-
typing_extensions==4.12.2
|
51 |
-
urllib3==2.3.0
|
52 |
-
setproctitle==1.3.4
|
53 |
-
tzdata==2025.1
|
54 |
-
sympy==1.13.1
|
55 |
-
pooch==1.8.2
|
56 |
-
click==8.1.8
|
57 |
-
pydantic_core==2.27.2
|
58 |
-
MarkupSafe==3.0.2
|
59 |
-
scipy==1.15.1
|
60 |
-
accelerate==1.3.0
|
61 |
-
tensorboard==2.19.0
|
62 |
-
protobuf==5.29.3
|
63 |
-
gitdb==4.0.12
|
64 |
-
smmap==5.0.2
|
65 |
-
absl-py==2.1.0
|
66 |
-
tqdm==4.67.1
|
67 |
-
yarl==1.18.3
|
68 |
-
pycparser==2.22
|
69 |
-
nvidia-cusparse-cu12==12.3.1.170
|
70 |
-
attrs==25.1.0
|
71 |
-
lazy_loader==0.4
|
72 |
-
tensorboard-data-server==0.7.2
|
73 |
-
threadpoolctl==3.5.0
|
74 |
-
GitPython==3.1.44
|
75 |
-
safetensors==0.5.2
|
76 |
-
fsspec==2024.12.0
|
77 |
-
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
-
filelock==3.17.0
|
79 |
-
aiohappyeyeballs==2.4.6
|
80 |
-
packaging==24.2
|
81 |
-
datasets==3.2.1.dev0
|
82 |
-
audioread==3.0.1
|
83 |
-
propcache==0.2.1
|
84 |
-
transformers==4.49.0.dev0
|
85 |
-
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
-
cffi==1.17.1
|
87 |
-
evaluate==0.4.3
|
88 |
-
Werkzeug==3.1.3
|
89 |
-
huggingface-hub==0.28.1
|
90 |
-
Jinja2==3.1.5
|
91 |
-
torch==2.6.0
|
92 |
-
nvidia-curand-cu12==10.3.5.147
|
93 |
-
xxhash==3.5.0
|
94 |
-
platformdirs==4.3.6
|
95 |
-
multidict==6.1.0
|
96 |
-
nvidia-cublas-cu12==12.4.5.8
|
97 |
-
nvidia-nvjitlink-cu12==12.4.127
|
98 |
-
triton==3.2.0
|
99 |
-
numba==0.61.0
|
100 |
-
importlib_metadata==8.0.0
|
101 |
-
platformdirs==4.2.2
|
102 |
-
typeguard==4.3.0
|
103 |
-
more-itertools==10.3.0
|
104 |
-
tomli==2.0.1
|
105 |
-
autocommand==2.2.2
|
106 |
-
zipp==3.19.2
|
107 |
-
typing_extensions==4.12.2
|
108 |
-
backports.tarfile==1.2.0
|
109 |
-
inflect==7.3.1
|
110 |
-
jaraco.text==3.12.1
|
111 |
-
wheel==0.43.0
|
112 |
-
packaging==24.2
|
113 |
-
jaraco.collections==5.1.0
|
114 |
-
jaraco.functools==4.0.1
|
115 |
-
jaraco.context==5.3.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
-
"python": "CPython 3.12.3",
|
4 |
-
"startedAt": "2025-02-12T12:26:37.277902Z",
|
5 |
-
"args": [
|
6 |
-
"--model_name_or_path=openai/whisper-small",
|
7 |
-
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
-
"--language=basque",
|
9 |
-
"--train_split_name=train",
|
10 |
-
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
11 |
-
"--model_index_name=Whisper Small Basque",
|
12 |
-
"--max_steps=8000",
|
13 |
-
"--output_dir=./",
|
14 |
-
"--per_device_train_batch_size=32",
|
15 |
-
"--per_device_eval_batch_size=16",
|
16 |
-
"--gradient_accumulation_steps=1",
|
17 |
-
"--logging_steps=25",
|
18 |
-
"--learning_rate=1e-5",
|
19 |
-
"--warmup_steps=500",
|
20 |
-
"--evaluation_strategy=steps",
|
21 |
-
"--eval_steps=1000",
|
22 |
-
"--save_strategy=steps",
|
23 |
-
"--save_steps=1000",
|
24 |
-
"--generation_max_length=225",
|
25 |
-
"--length_column_name=input_length",
|
26 |
-
"--max_duration_in_seconds=30",
|
27 |
-
"--text_column_name=sentence",
|
28 |
-
"--freeze_feature_encoder=False",
|
29 |
-
"--report_to=tensorboard",
|
30 |
-
"--metric_for_best_model=wer",
|
31 |
-
"--greater_is_better=False",
|
32 |
-
"--load_best_model_at_end",
|
33 |
-
"--gradient_checkpointing",
|
34 |
-
"--fp16",
|
35 |
-
"--overwrite_output_dir",
|
36 |
-
"--do_train",
|
37 |
-
"--do_eval",
|
38 |
-
"--predict_with_generate",
|
39 |
-
"--do_normalize_eval",
|
40 |
-
"--streaming",
|
41 |
-
"--use_auth_token",
|
42 |
-
"--push_to_hub",
|
43 |
-
"--report_to",
|
44 |
-
"wandb",
|
45 |
-
"--run_name",
|
46 |
-
"whisper-small-eu"
|
47 |
-
],
|
48 |
-
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
-
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
-
"git": {
|
51 |
-
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
-
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
-
},
|
54 |
-
"email": "[email protected]",
|
55 |
-
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
-
"host": "tknika",
|
57 |
-
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
-
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
-
"cpu_count": 8,
|
60 |
-
"cpu_count_logical": 8,
|
61 |
-
"gpu": "NVIDIA L40-48Q",
|
62 |
-
"gpu_count": 1,
|
63 |
-
"disk": {
|
64 |
-
"/": {
|
65 |
-
"total": "525987168256",
|
66 |
-
"used": "297346666496"
|
67 |
-
}
|
68 |
-
},
|
69 |
-
"memory": {
|
70 |
-
"total": "33654022144"
|
71 |
-
},
|
72 |
-
"cpu": {
|
73 |
-
"count": 8,
|
74 |
-
"countLogical": 8
|
75 |
-
},
|
76 |
-
"gpu_nvidia": [
|
77 |
-
{
|
78 |
-
"name": "NVIDIA L40-48Q",
|
79 |
-
"memoryTotal": "51539607552",
|
80 |
-
"cudaCores": 18176,
|
81 |
-
"architecture": "Ada"
|
82 |
-
}
|
83 |
-
],
|
84 |
-
"cudaVersion": "12.4"
|
85 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"_wandb":{"runtime":0}}
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
-
{"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110}
|
3 |
-
{"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}}
|
4 |
-
{"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"}
|
5 |
-
{"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
|
6 |
-
{"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
|
7 |
-
{"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"}
|
8 |
-
{"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"}
|
9 |
-
{"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"}
|
10 |
-
{"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"}
|
11 |
-
{"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"}
|
12 |
-
{"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"}
|
13 |
-
{"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"}
|
14 |
-
{"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"}
|
2 |
-
{"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"}
|
3 |
-
{"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"}
|
4 |
-
{"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"}
|
5 |
-
{"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"}
|
6 |
-
{"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"}
|
7 |
-
{"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
-
{"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"}
|
9 |
-
{"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
-
{"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
-
{"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
-
{"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"}
|
13 |
-
{"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"}
|
14 |
-
{"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"}
|
15 |
-
{"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110
|
3 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
|
7 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
|
8 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():756] calling init triggers
|
9 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
-
config: {'_wandb': {}}
|
11 |
-
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():789] starting backend
|
12 |
-
2025-02-12 12:26:37,272 INFO MainThread:224110 [wandb_init.py:init():793] sending inform_init request
|
13 |
-
2025-02-12 12:26:37,277 INFO MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
-
2025-02-12 12:26:37,277 INFO MainThread:224110 [wandb_init.py:init():808] backend started and connected
|
15 |
-
2025-02-12 12:26:37,279 INFO MainThread:224110 [wandb_init.py:init():901] updated telemetry
|
16 |
-
2025-02-12 12:26:37,285 INFO MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
-
2025-02-12 12:26:37,653 INFO MainThread:224110 [wandb_init.py:init():994] starting run threads in backend
|
18 |
-
2025-02-12 12:26:37,764 INFO MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
-
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
-
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
-
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
-
2025-02-12 12:26:37,766 INFO MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
-
2025-02-12 12:26:37,767 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
-
2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7a5cbc15a330>>
|
25 |
-
2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
-
2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb
DELETED
Binary file (11.3 kB)
|
|
wandb/run-20250212_122854-4m048f5s/files/config.yaml
DELETED
@@ -1,512 +0,0 @@
|
|
1 |
-
_attn_implementation_autoset:
|
2 |
-
value: true
|
3 |
-
_name_or_path:
|
4 |
-
value: openai/whisper-small
|
5 |
-
_wandb:
|
6 |
-
value:
|
7 |
-
cli_version: 0.19.6
|
8 |
-
m:
|
9 |
-
- "1": train/global_step
|
10 |
-
"6":
|
11 |
-
- 3
|
12 |
-
"7": []
|
13 |
-
python_version: 3.12.3
|
14 |
-
t:
|
15 |
-
"1":
|
16 |
-
- 1
|
17 |
-
- 5
|
18 |
-
- 11
|
19 |
-
- 49
|
20 |
-
- 51
|
21 |
-
- 53
|
22 |
-
- 55
|
23 |
-
- 71
|
24 |
-
- 100
|
25 |
-
"2":
|
26 |
-
- 1
|
27 |
-
- 5
|
28 |
-
- 11
|
29 |
-
- 49
|
30 |
-
- 51
|
31 |
-
- 53
|
32 |
-
- 55
|
33 |
-
- 71
|
34 |
-
- 100
|
35 |
-
"3":
|
36 |
-
- 7
|
37 |
-
- 13
|
38 |
-
- 19
|
39 |
-
- 23
|
40 |
-
- 55
|
41 |
-
- 66
|
42 |
-
"4": 3.12.3
|
43 |
-
"5": 0.19.6
|
44 |
-
"6": 4.49.0.dev0
|
45 |
-
"8":
|
46 |
-
- 5
|
47 |
-
"9":
|
48 |
-
"1": transformers_trainer
|
49 |
-
"12": 0.19.6
|
50 |
-
"13": linux-x86_64
|
51 |
-
accelerator_config:
|
52 |
-
value:
|
53 |
-
dispatch_batches: null
|
54 |
-
even_batches: true
|
55 |
-
gradient_accumulation_kwargs: null
|
56 |
-
non_blocking: false
|
57 |
-
split_batches: false
|
58 |
-
use_seedable_sampler: true
|
59 |
-
activation_dropout:
|
60 |
-
value: 0
|
61 |
-
activation_function:
|
62 |
-
value: gelu
|
63 |
-
adafactor:
|
64 |
-
value: false
|
65 |
-
adam_beta1:
|
66 |
-
value: 0.9
|
67 |
-
adam_beta2:
|
68 |
-
value: 0.999
|
69 |
-
adam_epsilon:
|
70 |
-
value: 1e-08
|
71 |
-
add_cross_attention:
|
72 |
-
value: false
|
73 |
-
apply_spec_augment:
|
74 |
-
value: false
|
75 |
-
architectures:
|
76 |
-
value:
|
77 |
-
- WhisperForConditionalGeneration
|
78 |
-
attention_dropout:
|
79 |
-
value: 0
|
80 |
-
auto_find_batch_size:
|
81 |
-
value: false
|
82 |
-
average_tokens_across_devices:
|
83 |
-
value: false
|
84 |
-
bad_words_ids:
|
85 |
-
value: null
|
86 |
-
batch_eval_metrics:
|
87 |
-
value: false
|
88 |
-
begin_suppress_tokens:
|
89 |
-
value:
|
90 |
-
- 220
|
91 |
-
- 50257
|
92 |
-
bf16:
|
93 |
-
value: false
|
94 |
-
bf16_full_eval:
|
95 |
-
value: false
|
96 |
-
bos_token_id:
|
97 |
-
value: 50257
|
98 |
-
chunk_size_feed_forward:
|
99 |
-
value: 0
|
100 |
-
classifier_proj_size:
|
101 |
-
value: 256
|
102 |
-
cross_attention_hidden_size:
|
103 |
-
value: null
|
104 |
-
d_model:
|
105 |
-
value: 768
|
106 |
-
data_seed:
|
107 |
-
value: null
|
108 |
-
dataloader_drop_last:
|
109 |
-
value: false
|
110 |
-
dataloader_num_workers:
|
111 |
-
value: 0
|
112 |
-
dataloader_persistent_workers:
|
113 |
-
value: false
|
114 |
-
dataloader_pin_memory:
|
115 |
-
value: true
|
116 |
-
dataloader_prefetch_factor:
|
117 |
-
value: null
|
118 |
-
ddp_backend:
|
119 |
-
value: null
|
120 |
-
ddp_broadcast_buffers:
|
121 |
-
value: null
|
122 |
-
ddp_bucket_cap_mb:
|
123 |
-
value: null
|
124 |
-
ddp_find_unused_parameters:
|
125 |
-
value: null
|
126 |
-
ddp_timeout:
|
127 |
-
value: 1800
|
128 |
-
debug:
|
129 |
-
value: []
|
130 |
-
decoder_attention_heads:
|
131 |
-
value: 12
|
132 |
-
decoder_ffn_dim:
|
133 |
-
value: 3072
|
134 |
-
decoder_layerdrop:
|
135 |
-
value: 0
|
136 |
-
decoder_layers:
|
137 |
-
value: 12
|
138 |
-
decoder_start_token_id:
|
139 |
-
value: 50258
|
140 |
-
deepspeed:
|
141 |
-
value: null
|
142 |
-
disable_tqdm:
|
143 |
-
value: false
|
144 |
-
dispatch_batches:
|
145 |
-
value: null
|
146 |
-
diversity_penalty:
|
147 |
-
value: 0
|
148 |
-
do_eval:
|
149 |
-
value: true
|
150 |
-
do_predict:
|
151 |
-
value: false
|
152 |
-
do_sample:
|
153 |
-
value: false
|
154 |
-
do_train:
|
155 |
-
value: true
|
156 |
-
dropout:
|
157 |
-
value: 0
|
158 |
-
early_stopping:
|
159 |
-
value: false
|
160 |
-
encoder_attention_heads:
|
161 |
-
value: 12
|
162 |
-
encoder_ffn_dim:
|
163 |
-
value: 3072
|
164 |
-
encoder_layerdrop:
|
165 |
-
value: 0
|
166 |
-
encoder_layers:
|
167 |
-
value: 12
|
168 |
-
encoder_no_repeat_ngram_size:
|
169 |
-
value: 0
|
170 |
-
eos_token_id:
|
171 |
-
value: 50257
|
172 |
-
eval_accumulation_steps:
|
173 |
-
value: null
|
174 |
-
eval_delay:
|
175 |
-
value: 0
|
176 |
-
eval_do_concat_batches:
|
177 |
-
value: true
|
178 |
-
eval_on_start:
|
179 |
-
value: false
|
180 |
-
eval_steps:
|
181 |
-
value: 1000
|
182 |
-
eval_strategy:
|
183 |
-
value: steps
|
184 |
-
eval_use_gather_object:
|
185 |
-
value: false
|
186 |
-
evaluation_strategy:
|
187 |
-
value: steps
|
188 |
-
exponential_decay_length_penalty:
|
189 |
-
value: null
|
190 |
-
finetuning_task:
|
191 |
-
value: null
|
192 |
-
forced_bos_token_id:
|
193 |
-
value: null
|
194 |
-
forced_decoder_ids:
|
195 |
-
value: null
|
196 |
-
forced_eos_token_id:
|
197 |
-
value: null
|
198 |
-
fp16:
|
199 |
-
value: true
|
200 |
-
fp16_backend:
|
201 |
-
value: auto
|
202 |
-
fp16_full_eval:
|
203 |
-
value: false
|
204 |
-
fp16_opt_level:
|
205 |
-
value: O1
|
206 |
-
fsdp:
|
207 |
-
value: []
|
208 |
-
fsdp_config:
|
209 |
-
value:
|
210 |
-
min_num_params: 0
|
211 |
-
xla: false
|
212 |
-
xla_fsdp_grad_ckpt: false
|
213 |
-
xla_fsdp_v2: false
|
214 |
-
fsdp_min_num_params:
|
215 |
-
value: 0
|
216 |
-
fsdp_transformer_layer_cls_to_wrap:
|
217 |
-
value: null
|
218 |
-
full_determinism:
|
219 |
-
value: false
|
220 |
-
generation_config:
|
221 |
-
value: null
|
222 |
-
generation_max_length:
|
223 |
-
value: 225
|
224 |
-
generation_num_beams:
|
225 |
-
value: null
|
226 |
-
gradient_accumulation_steps:
|
227 |
-
value: 1
|
228 |
-
gradient_checkpointing:
|
229 |
-
value: true
|
230 |
-
gradient_checkpointing_kwargs:
|
231 |
-
value: null
|
232 |
-
greater_is_better:
|
233 |
-
value: false
|
234 |
-
group_by_length:
|
235 |
-
value: false
|
236 |
-
half_precision_backend:
|
237 |
-
value: auto
|
238 |
-
hub_always_push:
|
239 |
-
value: false
|
240 |
-
hub_model_id:
|
241 |
-
value: null
|
242 |
-
hub_private_repo:
|
243 |
-
value: null
|
244 |
-
hub_strategy:
|
245 |
-
value: every_save
|
246 |
-
hub_token:
|
247 |
-
value: <HUB_TOKEN>
|
248 |
-
id2label:
|
249 |
-
value:
|
250 |
-
"0": LABEL_0
|
251 |
-
"1": LABEL_1
|
252 |
-
ignore_data_skip:
|
253 |
-
value: false
|
254 |
-
include_for_metrics:
|
255 |
-
value: []
|
256 |
-
include_inputs_for_metrics:
|
257 |
-
value: false
|
258 |
-
include_num_input_tokens_seen:
|
259 |
-
value: false
|
260 |
-
include_tokens_per_second:
|
261 |
-
value: false
|
262 |
-
init_std:
|
263 |
-
value: 0.02
|
264 |
-
is_decoder:
|
265 |
-
value: false
|
266 |
-
is_encoder_decoder:
|
267 |
-
value: true
|
268 |
-
jit_mode_eval:
|
269 |
-
value: false
|
270 |
-
label_names:
|
271 |
-
value: null
|
272 |
-
label_smoothing_factor:
|
273 |
-
value: 0
|
274 |
-
label2id:
|
275 |
-
value:
|
276 |
-
LABEL_0: 0
|
277 |
-
LABEL_1: 1
|
278 |
-
learning_rate:
|
279 |
-
value: 1e-05
|
280 |
-
length_column_name:
|
281 |
-
value: input_length
|
282 |
-
length_penalty:
|
283 |
-
value: 1
|
284 |
-
load_best_model_at_end:
|
285 |
-
value: true
|
286 |
-
local_rank:
|
287 |
-
value: 0
|
288 |
-
log_level:
|
289 |
-
value: passive
|
290 |
-
log_level_replica:
|
291 |
-
value: warning
|
292 |
-
log_on_each_node:
|
293 |
-
value: true
|
294 |
-
logging_dir:
|
295 |
-
value: ./runs/Feb12_12-28-29_tknika
|
296 |
-
logging_first_step:
|
297 |
-
value: false
|
298 |
-
logging_nan_inf_filter:
|
299 |
-
value: true
|
300 |
-
logging_steps:
|
301 |
-
value: 25
|
302 |
-
logging_strategy:
|
303 |
-
value: steps
|
304 |
-
lr_scheduler_type:
|
305 |
-
value: linear
|
306 |
-
mask_feature_length:
|
307 |
-
value: 10
|
308 |
-
mask_feature_min_masks:
|
309 |
-
value: 0
|
310 |
-
mask_feature_prob:
|
311 |
-
value: 0
|
312 |
-
mask_time_length:
|
313 |
-
value: 10
|
314 |
-
mask_time_min_masks:
|
315 |
-
value: 2
|
316 |
-
mask_time_prob:
|
317 |
-
value: 0.05
|
318 |
-
max_grad_norm:
|
319 |
-
value: 1
|
320 |
-
max_length:
|
321 |
-
value: 448
|
322 |
-
max_source_positions:
|
323 |
-
value: 1500
|
324 |
-
max_steps:
|
325 |
-
value: 8000
|
326 |
-
max_target_positions:
|
327 |
-
value: 448
|
328 |
-
median_filter_width:
|
329 |
-
value: 7
|
330 |
-
metric_for_best_model:
|
331 |
-
value: wer
|
332 |
-
min_length:
|
333 |
-
value: 0
|
334 |
-
model/num_parameters:
|
335 |
-
value: 241734912
|
336 |
-
model_type:
|
337 |
-
value: whisper
|
338 |
-
mp_parameters:
|
339 |
-
value: ""
|
340 |
-
neftune_noise_alpha:
|
341 |
-
value: null
|
342 |
-
no_cuda:
|
343 |
-
value: false
|
344 |
-
no_repeat_ngram_size:
|
345 |
-
value: 0
|
346 |
-
num_beam_groups:
|
347 |
-
value: 1
|
348 |
-
num_beams:
|
349 |
-
value: 1
|
350 |
-
num_hidden_layers:
|
351 |
-
value: 12
|
352 |
-
num_mel_bins:
|
353 |
-
value: 80
|
354 |
-
num_return_sequences:
|
355 |
-
value: 1
|
356 |
-
num_train_epochs:
|
357 |
-
value: 3
|
358 |
-
optim:
|
359 |
-
value: adamw_torch
|
360 |
-
optim_args:
|
361 |
-
value: null
|
362 |
-
optim_target_modules:
|
363 |
-
value: null
|
364 |
-
output_attentions:
|
365 |
-
value: false
|
366 |
-
output_dir:
|
367 |
-
value: ./
|
368 |
-
output_hidden_states:
|
369 |
-
value: false
|
370 |
-
output_scores:
|
371 |
-
value: false
|
372 |
-
overwrite_output_dir:
|
373 |
-
value: true
|
374 |
-
pad_token_id:
|
375 |
-
value: 50257
|
376 |
-
past_index:
|
377 |
-
value: -1
|
378 |
-
per_device_eval_batch_size:
|
379 |
-
value: 16
|
380 |
-
per_device_train_batch_size:
|
381 |
-
value: 32
|
382 |
-
per_gpu_eval_batch_size:
|
383 |
-
value: null
|
384 |
-
per_gpu_train_batch_size:
|
385 |
-
value: null
|
386 |
-
predict_with_generate:
|
387 |
-
value: true
|
388 |
-
prediction_loss_only:
|
389 |
-
value: false
|
390 |
-
prefix:
|
391 |
-
value: null
|
392 |
-
problem_type:
|
393 |
-
value: null
|
394 |
-
push_to_hub:
|
395 |
-
value: true
|
396 |
-
push_to_hub_model_id:
|
397 |
-
value: null
|
398 |
-
push_to_hub_organization:
|
399 |
-
value: null
|
400 |
-
push_to_hub_token:
|
401 |
-
value: <PUSH_TO_HUB_TOKEN>
|
402 |
-
ray_scope:
|
403 |
-
value: last
|
404 |
-
remove_invalid_values:
|
405 |
-
value: false
|
406 |
-
remove_unused_columns:
|
407 |
-
value: true
|
408 |
-
repetition_penalty:
|
409 |
-
value: 1
|
410 |
-
report_to:
|
411 |
-
value:
|
412 |
-
- wandb
|
413 |
-
restore_callback_states_from_checkpoint:
|
414 |
-
value: false
|
415 |
-
resume_from_checkpoint:
|
416 |
-
value: null
|
417 |
-
return_dict:
|
418 |
-
value: true
|
419 |
-
return_dict_in_generate:
|
420 |
-
value: false
|
421 |
-
run_name:
|
422 |
-
value: whisper-small-eu
|
423 |
-
save_on_each_node:
|
424 |
-
value: false
|
425 |
-
save_only_model:
|
426 |
-
value: false
|
427 |
-
save_safetensors:
|
428 |
-
value: true
|
429 |
-
save_steps:
|
430 |
-
value: 1000
|
431 |
-
save_strategy:
|
432 |
-
value: steps
|
433 |
-
save_total_limit:
|
434 |
-
value: null
|
435 |
-
scale_embedding:
|
436 |
-
value: false
|
437 |
-
seed:
|
438 |
-
value: 42
|
439 |
-
sep_token_id:
|
440 |
-
value: null
|
441 |
-
skip_memory_metrics:
|
442 |
-
value: true
|
443 |
-
sortish_sampler:
|
444 |
-
value: false
|
445 |
-
split_batches:
|
446 |
-
value: null
|
447 |
-
suppress_tokens:
|
448 |
-
value: null
|
449 |
-
task_specific_params:
|
450 |
-
value: null
|
451 |
-
temperature:
|
452 |
-
value: 1
|
453 |
-
tf_legacy_loss:
|
454 |
-
value: false
|
455 |
-
tf32:
|
456 |
-
value: null
|
457 |
-
tie_encoder_decoder:
|
458 |
-
value: false
|
459 |
-
tie_word_embeddings:
|
460 |
-
value: true
|
461 |
-
tokenizer_class:
|
462 |
-
value: null
|
463 |
-
top_k:
|
464 |
-
value: 50
|
465 |
-
top_p:
|
466 |
-
value: 1
|
467 |
-
torch_compile:
|
468 |
-
value: false
|
469 |
-
torch_compile_backend:
|
470 |
-
value: null
|
471 |
-
torch_compile_mode:
|
472 |
-
value: null
|
473 |
-
torch_dtype:
|
474 |
-
value: float32
|
475 |
-
torch_empty_cache_steps:
|
476 |
-
value: null
|
477 |
-
torchdynamo:
|
478 |
-
value: null
|
479 |
-
torchscript:
|
480 |
-
value: false
|
481 |
-
tpu_metrics_debug:
|
482 |
-
value: false
|
483 |
-
tpu_num_cores:
|
484 |
-
value: null
|
485 |
-
transformers_version:
|
486 |
-
value: 4.49.0.dev0
|
487 |
-
typical_p:
|
488 |
-
value: 1
|
489 |
-
use_bfloat16:
|
490 |
-
value: false
|
491 |
-
use_cache:
|
492 |
-
value: false
|
493 |
-
use_cpu:
|
494 |
-
value: false
|
495 |
-
use_ipex:
|
496 |
-
value: false
|
497 |
-
use_legacy_prediction_loop:
|
498 |
-
value: false
|
499 |
-
use_liger_kernel:
|
500 |
-
value: false
|
501 |
-
use_mps_device:
|
502 |
-
value: false
|
503 |
-
use_weighted_layer_sum:
|
504 |
-
value: false
|
505 |
-
vocab_size:
|
506 |
-
value: 51865
|
507 |
-
warmup_ratio:
|
508 |
-
value: 0
|
509 |
-
warmup_steps:
|
510 |
-
value: 500
|
511 |
-
weight_decay:
|
512 |
-
value: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122854-4m048f5s/files/output.log
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
-
main()
|
4 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
-
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
-
return inner_training_loop(
|
9 |
-
^^^^^^^^^^^^^^^^^^^^
|
10 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
-
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
-
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
-
result = getattr(callback, event)(
|
18 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
-
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
-
AttributeError: 'NoneType' object has no attribute 'dataset'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122854-4m048f5s/files/requirements.txt
DELETED
@@ -1,115 +0,0 @@
|
|
1 |
-
aiosignal==1.3.2
|
2 |
-
Markdown==3.7
|
3 |
-
more-itertools==10.6.0
|
4 |
-
requests==2.32.3
|
5 |
-
sentry-sdk==2.21.0
|
6 |
-
torchaudio==2.6.0
|
7 |
-
charset-normalizer==3.4.1
|
8 |
-
docker-pycreds==0.4.0
|
9 |
-
nvidia-cusolver-cu12==11.6.1.9
|
10 |
-
PyYAML==6.0.2
|
11 |
-
librosa==0.10.2.post1
|
12 |
-
soxr==0.5.0.post1
|
13 |
-
multiprocess==0.70.16
|
14 |
-
setuptools==75.8.0
|
15 |
-
nvidia-cufft-cu12==11.2.1.3
|
16 |
-
joblib==1.4.2
|
17 |
-
pytz==2025.1
|
18 |
-
pip==24.0
|
19 |
-
scikit-learn==1.6.1
|
20 |
-
certifi==2025.1.31
|
21 |
-
jiwer==3.1.0
|
22 |
-
regex==2024.11.6
|
23 |
-
annotated-types==0.7.0
|
24 |
-
grpcio==1.70.0
|
25 |
-
msgpack==1.1.0
|
26 |
-
mpmath==1.3.0
|
27 |
-
nvidia-cudnn-cu12==9.1.0.70
|
28 |
-
soundfile==0.13.1
|
29 |
-
dill==0.3.8
|
30 |
-
nvidia-nvtx-cu12==12.4.127
|
31 |
-
six==1.17.0
|
32 |
-
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
-
pyarrow==19.0.0
|
34 |
-
nvidia-nccl-cu12==2.21.5
|
35 |
-
psutil==6.1.1
|
36 |
-
decorator==5.1.1
|
37 |
-
llvmlite==0.44.0
|
38 |
-
frozenlist==1.5.0
|
39 |
-
pydantic==2.10.6
|
40 |
-
networkx==3.4.2
|
41 |
-
idna==3.10
|
42 |
-
wandb==0.19.6
|
43 |
-
aiohttp==3.11.12
|
44 |
-
RapidFuzz==3.12.1
|
45 |
-
pandas==2.2.3
|
46 |
-
python-dateutil==2.9.0.post0
|
47 |
-
numpy==2.1.3
|
48 |
-
tokenizers==0.21.0
|
49 |
-
nvidia-cusparselt-cu12==0.6.2
|
50 |
-
typing_extensions==4.12.2
|
51 |
-
urllib3==2.3.0
|
52 |
-
setproctitle==1.3.4
|
53 |
-
tzdata==2025.1
|
54 |
-
sympy==1.13.1
|
55 |
-
pooch==1.8.2
|
56 |
-
click==8.1.8
|
57 |
-
pydantic_core==2.27.2
|
58 |
-
MarkupSafe==3.0.2
|
59 |
-
scipy==1.15.1
|
60 |
-
accelerate==1.3.0
|
61 |
-
tensorboard==2.19.0
|
62 |
-
protobuf==5.29.3
|
63 |
-
gitdb==4.0.12
|
64 |
-
smmap==5.0.2
|
65 |
-
absl-py==2.1.0
|
66 |
-
tqdm==4.67.1
|
67 |
-
yarl==1.18.3
|
68 |
-
pycparser==2.22
|
69 |
-
nvidia-cusparse-cu12==12.3.1.170
|
70 |
-
attrs==25.1.0
|
71 |
-
lazy_loader==0.4
|
72 |
-
tensorboard-data-server==0.7.2
|
73 |
-
threadpoolctl==3.5.0
|
74 |
-
GitPython==3.1.44
|
75 |
-
safetensors==0.5.2
|
76 |
-
fsspec==2024.12.0
|
77 |
-
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
-
filelock==3.17.0
|
79 |
-
aiohappyeyeballs==2.4.6
|
80 |
-
packaging==24.2
|
81 |
-
datasets==3.2.1.dev0
|
82 |
-
audioread==3.0.1
|
83 |
-
propcache==0.2.1
|
84 |
-
transformers==4.49.0.dev0
|
85 |
-
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
-
cffi==1.17.1
|
87 |
-
evaluate==0.4.3
|
88 |
-
Werkzeug==3.1.3
|
89 |
-
huggingface-hub==0.28.1
|
90 |
-
Jinja2==3.1.5
|
91 |
-
torch==2.6.0
|
92 |
-
nvidia-curand-cu12==10.3.5.147
|
93 |
-
xxhash==3.5.0
|
94 |
-
platformdirs==4.3.6
|
95 |
-
multidict==6.1.0
|
96 |
-
nvidia-cublas-cu12==12.4.5.8
|
97 |
-
nvidia-nvjitlink-cu12==12.4.127
|
98 |
-
triton==3.2.0
|
99 |
-
numba==0.61.0
|
100 |
-
importlib_metadata==8.0.0
|
101 |
-
platformdirs==4.2.2
|
102 |
-
typeguard==4.3.0
|
103 |
-
more-itertools==10.3.0
|
104 |
-
tomli==2.0.1
|
105 |
-
autocommand==2.2.2
|
106 |
-
zipp==3.19.2
|
107 |
-
typing_extensions==4.12.2
|
108 |
-
backports.tarfile==1.2.0
|
109 |
-
inflect==7.3.1
|
110 |
-
jaraco.text==3.12.1
|
111 |
-
wheel==0.43.0
|
112 |
-
packaging==24.2
|
113 |
-
jaraco.collections==5.1.0
|
114 |
-
jaraco.functools==4.0.1
|
115 |
-
jaraco.context==5.3.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
-
"python": "CPython 3.12.3",
|
4 |
-
"startedAt": "2025-02-12T12:28:54.528397Z",
|
5 |
-
"args": [
|
6 |
-
"--model_name_or_path=openai/whisper-small",
|
7 |
-
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
-
"--language=basque",
|
9 |
-
"--train_split_name=train",
|
10 |
-
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
11 |
-
"--model_index_name=Whisper Small Basque",
|
12 |
-
"--max_steps=8000",
|
13 |
-
"--output_dir=./",
|
14 |
-
"--per_device_train_batch_size=32",
|
15 |
-
"--per_device_eval_batch_size=16",
|
16 |
-
"--gradient_accumulation_steps=1",
|
17 |
-
"--logging_steps=25",
|
18 |
-
"--learning_rate=1e-5",
|
19 |
-
"--warmup_steps=500",
|
20 |
-
"--evaluation_strategy=steps",
|
21 |
-
"--eval_steps=1000",
|
22 |
-
"--save_strategy=steps",
|
23 |
-
"--save_steps=1000",
|
24 |
-
"--generation_max_length=225",
|
25 |
-
"--length_column_name=input_length",
|
26 |
-
"--max_duration_in_seconds=30",
|
27 |
-
"--text_column_name=sentence",
|
28 |
-
"--freeze_feature_encoder=False",
|
29 |
-
"--report_to=tensorboard",
|
30 |
-
"--metric_for_best_model=wer",
|
31 |
-
"--greater_is_better=False",
|
32 |
-
"--load_best_model_at_end",
|
33 |
-
"--gradient_checkpointing",
|
34 |
-
"--fp16",
|
35 |
-
"--overwrite_output_dir",
|
36 |
-
"--do_train",
|
37 |
-
"--do_eval",
|
38 |
-
"--predict_with_generate",
|
39 |
-
"--do_normalize_eval",
|
40 |
-
"--streaming",
|
41 |
-
"--use_auth_token",
|
42 |
-
"--push_to_hub",
|
43 |
-
"--report_to",
|
44 |
-
"wandb",
|
45 |
-
"--run_name",
|
46 |
-
"whisper-small-eu"
|
47 |
-
],
|
48 |
-
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
-
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
-
"git": {
|
51 |
-
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
-
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
-
},
|
54 |
-
"email": "[email protected]",
|
55 |
-
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
-
"host": "tknika",
|
57 |
-
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
-
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
-
"cpu_count": 8,
|
60 |
-
"cpu_count_logical": 8,
|
61 |
-
"gpu": "NVIDIA L40-48Q",
|
62 |
-
"gpu_count": 1,
|
63 |
-
"disk": {
|
64 |
-
"/": {
|
65 |
-
"total": "525987168256",
|
66 |
-
"used": "297346756608"
|
67 |
-
}
|
68 |
-
},
|
69 |
-
"memory": {
|
70 |
-
"total": "33654022144"
|
71 |
-
},
|
72 |
-
"cpu": {
|
73 |
-
"count": 8,
|
74 |
-
"countLogical": 8
|
75 |
-
},
|
76 |
-
"gpu_nvidia": [
|
77 |
-
{
|
78 |
-
"name": "NVIDIA L40-48Q",
|
79 |
-
"memoryTotal": "51539607552",
|
80 |
-
"cudaCores": 18176,
|
81 |
-
"architecture": "Ada"
|
82 |
-
}
|
83 |
-
],
|
84 |
-
"cudaVersion": "12.4"
|
85 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"_wandb":{"runtime":0}}
|
|
|
|
wandb/run-20250212_122854-4m048f5s/logs/debug-core.log
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
-
{"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528}
|
3 |
-
{"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}}
|
4 |
-
{"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"}
|
5 |
-
{"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"}
|
6 |
-
{"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"}
|
7 |
-
{"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"}
|
8 |
-
{"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"}
|
9 |
-
{"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"}
|
10 |
-
{"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"}
|
11 |
-
{"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"}
|
12 |
-
{"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"}
|
13 |
-
{"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"}
|
14 |
-
{"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"}
|
2 |
-
{"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"}
|
3 |
-
{"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"}
|
4 |
-
{"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"}
|
5 |
-
{"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"}
|
6 |
-
{"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"}
|
7 |
-
{"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
-
{"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"}
|
9 |
-
{"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
-
{"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
-
{"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
-
{"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"}
|
13 |
-
{"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"}
|
14 |
-
{"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"}
|
15 |
-
{"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122854-4m048f5s/logs/debug.log
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528
|
3 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log
|
7 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
|
8 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():756] calling init triggers
|
9 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
-
config: {'_wandb': {}}
|
11 |
-
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():789] starting backend
|
12 |
-
2025-02-12 12:28:54,521 INFO MainThread:224528 [wandb_init.py:init():793] sending inform_init request
|
13 |
-
2025-02-12 12:28:54,527 INFO MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
-
2025-02-12 12:28:54,528 INFO MainThread:224528 [wandb_init.py:init():808] backend started and connected
|
15 |
-
2025-02-12 12:28:54,530 INFO MainThread:224528 [wandb_init.py:init():901] updated telemetry
|
16 |
-
2025-02-12 12:28:54,537 INFO MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
-
2025-02-12 12:28:54,883 INFO MainThread:224528 [wandb_init.py:init():994] starting run threads in backend
|
18 |
-
2025-02-12 12:28:54,988 INFO MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
-
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
-
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
-
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
-
2025-02-12 12:28:54,990 INFO MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
-
2025-02-12 12:28:54,991 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
-
2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x71c5f6c57cb0>>
|
25 |
-
2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
-
2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb
DELETED
Binary file (11.3 kB)
|
|
wandb/run-20250212_125202-c6xjc1gs/files/config.yaml
DELETED
@@ -1,512 +0,0 @@
|
|
1 |
-
_attn_implementation_autoset:
|
2 |
-
value: true
|
3 |
-
_name_or_path:
|
4 |
-
value: openai/whisper-small
|
5 |
-
_wandb:
|
6 |
-
value:
|
7 |
-
cli_version: 0.19.6
|
8 |
-
m:
|
9 |
-
- "1": train/global_step
|
10 |
-
"6":
|
11 |
-
- 3
|
12 |
-
"7": []
|
13 |
-
python_version: 3.12.3
|
14 |
-
t:
|
15 |
-
"1":
|
16 |
-
- 1
|
17 |
-
- 5
|
18 |
-
- 11
|
19 |
-
- 49
|
20 |
-
- 51
|
21 |
-
- 53
|
22 |
-
- 55
|
23 |
-
- 71
|
24 |
-
- 100
|
25 |
-
"2":
|
26 |
-
- 1
|
27 |
-
- 5
|
28 |
-
- 11
|
29 |
-
- 49
|
30 |
-
- 51
|
31 |
-
- 53
|
32 |
-
- 55
|
33 |
-
- 71
|
34 |
-
- 100
|
35 |
-
"3":
|
36 |
-
- 7
|
37 |
-
- 13
|
38 |
-
- 19
|
39 |
-
- 23
|
40 |
-
- 55
|
41 |
-
- 66
|
42 |
-
"4": 3.12.3
|
43 |
-
"5": 0.19.6
|
44 |
-
"6": 4.49.0.dev0
|
45 |
-
"8":
|
46 |
-
- 5
|
47 |
-
"9":
|
48 |
-
"1": transformers_trainer
|
49 |
-
"12": 0.19.6
|
50 |
-
"13": linux-x86_64
|
51 |
-
accelerator_config:
|
52 |
-
value:
|
53 |
-
dispatch_batches: null
|
54 |
-
even_batches: true
|
55 |
-
gradient_accumulation_kwargs: null
|
56 |
-
non_blocking: false
|
57 |
-
split_batches: false
|
58 |
-
use_seedable_sampler: true
|
59 |
-
activation_dropout:
|
60 |
-
value: 0
|
61 |
-
activation_function:
|
62 |
-
value: gelu
|
63 |
-
adafactor:
|
64 |
-
value: false
|
65 |
-
adam_beta1:
|
66 |
-
value: 0.9
|
67 |
-
adam_beta2:
|
68 |
-
value: 0.999
|
69 |
-
adam_epsilon:
|
70 |
-
value: 1e-08
|
71 |
-
add_cross_attention:
|
72 |
-
value: false
|
73 |
-
apply_spec_augment:
|
74 |
-
value: false
|
75 |
-
architectures:
|
76 |
-
value:
|
77 |
-
- WhisperForConditionalGeneration
|
78 |
-
attention_dropout:
|
79 |
-
value: 0
|
80 |
-
auto_find_batch_size:
|
81 |
-
value: false
|
82 |
-
average_tokens_across_devices:
|
83 |
-
value: false
|
84 |
-
bad_words_ids:
|
85 |
-
value: null
|
86 |
-
batch_eval_metrics:
|
87 |
-
value: false
|
88 |
-
begin_suppress_tokens:
|
89 |
-
value:
|
90 |
-
- 220
|
91 |
-
- 50257
|
92 |
-
bf16:
|
93 |
-
value: false
|
94 |
-
bf16_full_eval:
|
95 |
-
value: false
|
96 |
-
bos_token_id:
|
97 |
-
value: 50257
|
98 |
-
chunk_size_feed_forward:
|
99 |
-
value: 0
|
100 |
-
classifier_proj_size:
|
101 |
-
value: 256
|
102 |
-
cross_attention_hidden_size:
|
103 |
-
value: null
|
104 |
-
d_model:
|
105 |
-
value: 768
|
106 |
-
data_seed:
|
107 |
-
value: null
|
108 |
-
dataloader_drop_last:
|
109 |
-
value: false
|
110 |
-
dataloader_num_workers:
|
111 |
-
value: 0
|
112 |
-
dataloader_persistent_workers:
|
113 |
-
value: false
|
114 |
-
dataloader_pin_memory:
|
115 |
-
value: true
|
116 |
-
dataloader_prefetch_factor:
|
117 |
-
value: null
|
118 |
-
ddp_backend:
|
119 |
-
value: null
|
120 |
-
ddp_broadcast_buffers:
|
121 |
-
value: null
|
122 |
-
ddp_bucket_cap_mb:
|
123 |
-
value: null
|
124 |
-
ddp_find_unused_parameters:
|
125 |
-
value: null
|
126 |
-
ddp_timeout:
|
127 |
-
value: 1800
|
128 |
-
debug:
|
129 |
-
value: []
|
130 |
-
decoder_attention_heads:
|
131 |
-
value: 12
|
132 |
-
decoder_ffn_dim:
|
133 |
-
value: 3072
|
134 |
-
decoder_layerdrop:
|
135 |
-
value: 0
|
136 |
-
decoder_layers:
|
137 |
-
value: 12
|
138 |
-
decoder_start_token_id:
|
139 |
-
value: 50258
|
140 |
-
deepspeed:
|
141 |
-
value: null
|
142 |
-
disable_tqdm:
|
143 |
-
value: false
|
144 |
-
dispatch_batches:
|
145 |
-
value: null
|
146 |
-
diversity_penalty:
|
147 |
-
value: 0
|
148 |
-
do_eval:
|
149 |
-
value: true
|
150 |
-
do_predict:
|
151 |
-
value: false
|
152 |
-
do_sample:
|
153 |
-
value: false
|
154 |
-
do_train:
|
155 |
-
value: true
|
156 |
-
dropout:
|
157 |
-
value: 0
|
158 |
-
early_stopping:
|
159 |
-
value: false
|
160 |
-
encoder_attention_heads:
|
161 |
-
value: 12
|
162 |
-
encoder_ffn_dim:
|
163 |
-
value: 3072
|
164 |
-
encoder_layerdrop:
|
165 |
-
value: 0
|
166 |
-
encoder_layers:
|
167 |
-
value: 12
|
168 |
-
encoder_no_repeat_ngram_size:
|
169 |
-
value: 0
|
170 |
-
eos_token_id:
|
171 |
-
value: 50257
|
172 |
-
eval_accumulation_steps:
|
173 |
-
value: null
|
174 |
-
eval_delay:
|
175 |
-
value: 0
|
176 |
-
eval_do_concat_batches:
|
177 |
-
value: true
|
178 |
-
eval_on_start:
|
179 |
-
value: false
|
180 |
-
eval_steps:
|
181 |
-
value: 1000
|
182 |
-
eval_strategy:
|
183 |
-
value: steps
|
184 |
-
eval_use_gather_object:
|
185 |
-
value: false
|
186 |
-
evaluation_strategy:
|
187 |
-
value: steps
|
188 |
-
exponential_decay_length_penalty:
|
189 |
-
value: null
|
190 |
-
finetuning_task:
|
191 |
-
value: null
|
192 |
-
forced_bos_token_id:
|
193 |
-
value: null
|
194 |
-
forced_decoder_ids:
|
195 |
-
value: null
|
196 |
-
forced_eos_token_id:
|
197 |
-
value: null
|
198 |
-
fp16:
|
199 |
-
value: true
|
200 |
-
fp16_backend:
|
201 |
-
value: auto
|
202 |
-
fp16_full_eval:
|
203 |
-
value: false
|
204 |
-
fp16_opt_level:
|
205 |
-
value: O1
|
206 |
-
fsdp:
|
207 |
-
value: []
|
208 |
-
fsdp_config:
|
209 |
-
value:
|
210 |
-
min_num_params: 0
|
211 |
-
xla: false
|
212 |
-
xla_fsdp_grad_ckpt: false
|
213 |
-
xla_fsdp_v2: false
|
214 |
-
fsdp_min_num_params:
|
215 |
-
value: 0
|
216 |
-
fsdp_transformer_layer_cls_to_wrap:
|
217 |
-
value: null
|
218 |
-
full_determinism:
|
219 |
-
value: false
|
220 |
-
generation_config:
|
221 |
-
value: null
|
222 |
-
generation_max_length:
|
223 |
-
value: 225
|
224 |
-
generation_num_beams:
|
225 |
-
value: null
|
226 |
-
gradient_accumulation_steps:
|
227 |
-
value: 1
|
228 |
-
gradient_checkpointing:
|
229 |
-
value: true
|
230 |
-
gradient_checkpointing_kwargs:
|
231 |
-
value: null
|
232 |
-
greater_is_better:
|
233 |
-
value: false
|
234 |
-
group_by_length:
|
235 |
-
value: false
|
236 |
-
half_precision_backend:
|
237 |
-
value: auto
|
238 |
-
hub_always_push:
|
239 |
-
value: false
|
240 |
-
hub_model_id:
|
241 |
-
value: null
|
242 |
-
hub_private_repo:
|
243 |
-
value: null
|
244 |
-
hub_strategy:
|
245 |
-
value: every_save
|
246 |
-
hub_token:
|
247 |
-
value: <HUB_TOKEN>
|
248 |
-
id2label:
|
249 |
-
value:
|
250 |
-
"0": LABEL_0
|
251 |
-
"1": LABEL_1
|
252 |
-
ignore_data_skip:
|
253 |
-
value: false
|
254 |
-
include_for_metrics:
|
255 |
-
value: []
|
256 |
-
include_inputs_for_metrics:
|
257 |
-
value: false
|
258 |
-
include_num_input_tokens_seen:
|
259 |
-
value: false
|
260 |
-
include_tokens_per_second:
|
261 |
-
value: false
|
262 |
-
init_std:
|
263 |
-
value: 0.02
|
264 |
-
is_decoder:
|
265 |
-
value: false
|
266 |
-
is_encoder_decoder:
|
267 |
-
value: true
|
268 |
-
jit_mode_eval:
|
269 |
-
value: false
|
270 |
-
label_names:
|
271 |
-
value: null
|
272 |
-
label_smoothing_factor:
|
273 |
-
value: 0
|
274 |
-
label2id:
|
275 |
-
value:
|
276 |
-
LABEL_0: 0
|
277 |
-
LABEL_1: 1
|
278 |
-
learning_rate:
|
279 |
-
value: 1e-05
|
280 |
-
length_column_name:
|
281 |
-
value: input_length
|
282 |
-
length_penalty:
|
283 |
-
value: 1
|
284 |
-
load_best_model_at_end:
|
285 |
-
value: true
|
286 |
-
local_rank:
|
287 |
-
value: 0
|
288 |
-
log_level:
|
289 |
-
value: passive
|
290 |
-
log_level_replica:
|
291 |
-
value: warning
|
292 |
-
log_on_each_node:
|
293 |
-
value: true
|
294 |
-
logging_dir:
|
295 |
-
value: ./runs/Feb12_12-51-48_tknika
|
296 |
-
logging_first_step:
|
297 |
-
value: false
|
298 |
-
logging_nan_inf_filter:
|
299 |
-
value: true
|
300 |
-
logging_steps:
|
301 |
-
value: 25
|
302 |
-
logging_strategy:
|
303 |
-
value: steps
|
304 |
-
lr_scheduler_type:
|
305 |
-
value: linear
|
306 |
-
mask_feature_length:
|
307 |
-
value: 10
|
308 |
-
mask_feature_min_masks:
|
309 |
-
value: 0
|
310 |
-
mask_feature_prob:
|
311 |
-
value: 0
|
312 |
-
mask_time_length:
|
313 |
-
value: 10
|
314 |
-
mask_time_min_masks:
|
315 |
-
value: 2
|
316 |
-
mask_time_prob:
|
317 |
-
value: 0.05
|
318 |
-
max_grad_norm:
|
319 |
-
value: 1
|
320 |
-
max_length:
|
321 |
-
value: 448
|
322 |
-
max_source_positions:
|
323 |
-
value: 1500
|
324 |
-
max_steps:
|
325 |
-
value: 8000
|
326 |
-
max_target_positions:
|
327 |
-
value: 448
|
328 |
-
median_filter_width:
|
329 |
-
value: 7
|
330 |
-
metric_for_best_model:
|
331 |
-
value: wer
|
332 |
-
min_length:
|
333 |
-
value: 0
|
334 |
-
model/num_parameters:
|
335 |
-
value: 241734912
|
336 |
-
model_type:
|
337 |
-
value: whisper
|
338 |
-
mp_parameters:
|
339 |
-
value: ""
|
340 |
-
neftune_noise_alpha:
|
341 |
-
value: null
|
342 |
-
no_cuda:
|
343 |
-
value: false
|
344 |
-
no_repeat_ngram_size:
|
345 |
-
value: 0
|
346 |
-
num_beam_groups:
|
347 |
-
value: 1
|
348 |
-
num_beams:
|
349 |
-
value: 1
|
350 |
-
num_hidden_layers:
|
351 |
-
value: 12
|
352 |
-
num_mel_bins:
|
353 |
-
value: 80
|
354 |
-
num_return_sequences:
|
355 |
-
value: 1
|
356 |
-
num_train_epochs:
|
357 |
-
value: 3
|
358 |
-
optim:
|
359 |
-
value: adamw_torch
|
360 |
-
optim_args:
|
361 |
-
value: null
|
362 |
-
optim_target_modules:
|
363 |
-
value: null
|
364 |
-
output_attentions:
|
365 |
-
value: false
|
366 |
-
output_dir:
|
367 |
-
value: ./
|
368 |
-
output_hidden_states:
|
369 |
-
value: false
|
370 |
-
output_scores:
|
371 |
-
value: false
|
372 |
-
overwrite_output_dir:
|
373 |
-
value: true
|
374 |
-
pad_token_id:
|
375 |
-
value: 50257
|
376 |
-
past_index:
|
377 |
-
value: -1
|
378 |
-
per_device_eval_batch_size:
|
379 |
-
value: 16
|
380 |
-
per_device_train_batch_size:
|
381 |
-
value: 32
|
382 |
-
per_gpu_eval_batch_size:
|
383 |
-
value: null
|
384 |
-
per_gpu_train_batch_size:
|
385 |
-
value: null
|
386 |
-
predict_with_generate:
|
387 |
-
value: true
|
388 |
-
prediction_loss_only:
|
389 |
-
value: false
|
390 |
-
prefix:
|
391 |
-
value: null
|
392 |
-
problem_type:
|
393 |
-
value: null
|
394 |
-
push_to_hub:
|
395 |
-
value: true
|
396 |
-
push_to_hub_model_id:
|
397 |
-
value: null
|
398 |
-
push_to_hub_organization:
|
399 |
-
value: null
|
400 |
-
push_to_hub_token:
|
401 |
-
value: <PUSH_TO_HUB_TOKEN>
|
402 |
-
ray_scope:
|
403 |
-
value: last
|
404 |
-
remove_invalid_values:
|
405 |
-
value: false
|
406 |
-
remove_unused_columns:
|
407 |
-
value: true
|
408 |
-
repetition_penalty:
|
409 |
-
value: 1
|
410 |
-
report_to:
|
411 |
-
value:
|
412 |
-
- wandb
|
413 |
-
restore_callback_states_from_checkpoint:
|
414 |
-
value: false
|
415 |
-
resume_from_checkpoint:
|
416 |
-
value: null
|
417 |
-
return_dict:
|
418 |
-
value: true
|
419 |
-
return_dict_in_generate:
|
420 |
-
value: false
|
421 |
-
run_name:
|
422 |
-
value: whisper-small-eu
|
423 |
-
save_on_each_node:
|
424 |
-
value: false
|
425 |
-
save_only_model:
|
426 |
-
value: false
|
427 |
-
save_safetensors:
|
428 |
-
value: true
|
429 |
-
save_steps:
|
430 |
-
value: 1000
|
431 |
-
save_strategy:
|
432 |
-
value: steps
|
433 |
-
save_total_limit:
|
434 |
-
value: null
|
435 |
-
scale_embedding:
|
436 |
-
value: false
|
437 |
-
seed:
|
438 |
-
value: 42
|
439 |
-
sep_token_id:
|
440 |
-
value: null
|
441 |
-
skip_memory_metrics:
|
442 |
-
value: true
|
443 |
-
sortish_sampler:
|
444 |
-
value: false
|
445 |
-
split_batches:
|
446 |
-
value: null
|
447 |
-
suppress_tokens:
|
448 |
-
value: null
|
449 |
-
task_specific_params:
|
450 |
-
value: null
|
451 |
-
temperature:
|
452 |
-
value: 1
|
453 |
-
tf_legacy_loss:
|
454 |
-
value: false
|
455 |
-
tf32:
|
456 |
-
value: null
|
457 |
-
tie_encoder_decoder:
|
458 |
-
value: false
|
459 |
-
tie_word_embeddings:
|
460 |
-
value: true
|
461 |
-
tokenizer_class:
|
462 |
-
value: null
|
463 |
-
top_k:
|
464 |
-
value: 50
|
465 |
-
top_p:
|
466 |
-
value: 1
|
467 |
-
torch_compile:
|
468 |
-
value: false
|
469 |
-
torch_compile_backend:
|
470 |
-
value: null
|
471 |
-
torch_compile_mode:
|
472 |
-
value: null
|
473 |
-
torch_dtype:
|
474 |
-
value: float32
|
475 |
-
torch_empty_cache_steps:
|
476 |
-
value: null
|
477 |
-
torchdynamo:
|
478 |
-
value: null
|
479 |
-
torchscript:
|
480 |
-
value: false
|
481 |
-
tpu_metrics_debug:
|
482 |
-
value: false
|
483 |
-
tpu_num_cores:
|
484 |
-
value: null
|
485 |
-
transformers_version:
|
486 |
-
value: 4.49.0.dev0
|
487 |
-
typical_p:
|
488 |
-
value: 1
|
489 |
-
use_bfloat16:
|
490 |
-
value: false
|
491 |
-
use_cache:
|
492 |
-
value: false
|
493 |
-
use_cpu:
|
494 |
-
value: false
|
495 |
-
use_ipex:
|
496 |
-
value: false
|
497 |
-
use_legacy_prediction_loop:
|
498 |
-
value: false
|
499 |
-
use_liger_kernel:
|
500 |
-
value: false
|
501 |
-
use_mps_device:
|
502 |
-
value: false
|
503 |
-
use_weighted_layer_sum:
|
504 |
-
value: false
|
505 |
-
vocab_size:
|
506 |
-
value: 51865
|
507 |
-
warmup_ratio:
|
508 |
-
value: 0
|
509 |
-
warmup_steps:
|
510 |
-
value: 500
|
511 |
-
weight_decay:
|
512 |
-
value: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/files/output.log
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
3 |
-
main()
|
4 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
5 |
-
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
-
return inner_training_loop(
|
9 |
-
^^^^^^^^^^^^^^^^^^^^
|
10 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
-
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
-
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
-
result = getattr(callback, event)(
|
18 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
20 |
-
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
-
AttributeError: 'NoneType' object has no attribute 'dataset'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt
DELETED
@@ -1,115 +0,0 @@
|
|
1 |
-
aiosignal==1.3.2
|
2 |
-
Markdown==3.7
|
3 |
-
more-itertools==10.6.0
|
4 |
-
requests==2.32.3
|
5 |
-
sentry-sdk==2.21.0
|
6 |
-
torchaudio==2.6.0
|
7 |
-
charset-normalizer==3.4.1
|
8 |
-
docker-pycreds==0.4.0
|
9 |
-
nvidia-cusolver-cu12==11.6.1.9
|
10 |
-
PyYAML==6.0.2
|
11 |
-
librosa==0.10.2.post1
|
12 |
-
soxr==0.5.0.post1
|
13 |
-
multiprocess==0.70.16
|
14 |
-
setuptools==75.8.0
|
15 |
-
nvidia-cufft-cu12==11.2.1.3
|
16 |
-
joblib==1.4.2
|
17 |
-
pytz==2025.1
|
18 |
-
pip==24.0
|
19 |
-
scikit-learn==1.6.1
|
20 |
-
certifi==2025.1.31
|
21 |
-
jiwer==3.1.0
|
22 |
-
regex==2024.11.6
|
23 |
-
annotated-types==0.7.0
|
24 |
-
grpcio==1.70.0
|
25 |
-
msgpack==1.1.0
|
26 |
-
mpmath==1.3.0
|
27 |
-
nvidia-cudnn-cu12==9.1.0.70
|
28 |
-
soundfile==0.13.1
|
29 |
-
dill==0.3.8
|
30 |
-
nvidia-nvtx-cu12==12.4.127
|
31 |
-
six==1.17.0
|
32 |
-
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
-
pyarrow==19.0.0
|
34 |
-
nvidia-nccl-cu12==2.21.5
|
35 |
-
psutil==6.1.1
|
36 |
-
decorator==5.1.1
|
37 |
-
llvmlite==0.44.0
|
38 |
-
frozenlist==1.5.0
|
39 |
-
pydantic==2.10.6
|
40 |
-
networkx==3.4.2
|
41 |
-
idna==3.10
|
42 |
-
wandb==0.19.6
|
43 |
-
aiohttp==3.11.12
|
44 |
-
RapidFuzz==3.12.1
|
45 |
-
pandas==2.2.3
|
46 |
-
python-dateutil==2.9.0.post0
|
47 |
-
numpy==2.1.3
|
48 |
-
tokenizers==0.21.0
|
49 |
-
nvidia-cusparselt-cu12==0.6.2
|
50 |
-
typing_extensions==4.12.2
|
51 |
-
urllib3==2.3.0
|
52 |
-
setproctitle==1.3.4
|
53 |
-
tzdata==2025.1
|
54 |
-
sympy==1.13.1
|
55 |
-
pooch==1.8.2
|
56 |
-
click==8.1.8
|
57 |
-
pydantic_core==2.27.2
|
58 |
-
MarkupSafe==3.0.2
|
59 |
-
scipy==1.15.1
|
60 |
-
accelerate==1.3.0
|
61 |
-
tensorboard==2.19.0
|
62 |
-
protobuf==5.29.3
|
63 |
-
gitdb==4.0.12
|
64 |
-
smmap==5.0.2
|
65 |
-
absl-py==2.1.0
|
66 |
-
tqdm==4.67.1
|
67 |
-
yarl==1.18.3
|
68 |
-
pycparser==2.22
|
69 |
-
nvidia-cusparse-cu12==12.3.1.170
|
70 |
-
attrs==25.1.0
|
71 |
-
lazy_loader==0.4
|
72 |
-
tensorboard-data-server==0.7.2
|
73 |
-
threadpoolctl==3.5.0
|
74 |
-
GitPython==3.1.44
|
75 |
-
safetensors==0.5.2
|
76 |
-
fsspec==2024.12.0
|
77 |
-
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
-
filelock==3.17.0
|
79 |
-
aiohappyeyeballs==2.4.6
|
80 |
-
packaging==24.2
|
81 |
-
datasets==3.2.1.dev0
|
82 |
-
audioread==3.0.1
|
83 |
-
propcache==0.2.1
|
84 |
-
transformers==4.49.0.dev0
|
85 |
-
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
-
cffi==1.17.1
|
87 |
-
evaluate==0.4.3
|
88 |
-
Werkzeug==3.1.3
|
89 |
-
huggingface-hub==0.28.1
|
90 |
-
Jinja2==3.1.5
|
91 |
-
torch==2.6.0
|
92 |
-
nvidia-curand-cu12==10.3.5.147
|
93 |
-
xxhash==3.5.0
|
94 |
-
platformdirs==4.3.6
|
95 |
-
multidict==6.1.0
|
96 |
-
nvidia-cublas-cu12==12.4.5.8
|
97 |
-
nvidia-nvjitlink-cu12==12.4.127
|
98 |
-
triton==3.2.0
|
99 |
-
numba==0.61.0
|
100 |
-
importlib_metadata==8.0.0
|
101 |
-
platformdirs==4.2.2
|
102 |
-
typeguard==4.3.0
|
103 |
-
more-itertools==10.3.0
|
104 |
-
tomli==2.0.1
|
105 |
-
autocommand==2.2.2
|
106 |
-
zipp==3.19.2
|
107 |
-
typing_extensions==4.12.2
|
108 |
-
backports.tarfile==1.2.0
|
109 |
-
inflect==7.3.1
|
110 |
-
jaraco.text==3.12.1
|
111 |
-
wheel==0.43.0
|
112 |
-
packaging==24.2
|
113 |
-
jaraco.collections==5.1.0
|
114 |
-
jaraco.functools==4.0.1
|
115 |
-
jaraco.context==5.3.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
-
"python": "CPython 3.12.3",
|
4 |
-
"startedAt": "2025-02-12T12:52:03.105234Z",
|
5 |
-
"args": [
|
6 |
-
"--model_name_or_path=openai/whisper-small",
|
7 |
-
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
-
"--language=basque",
|
9 |
-
"--train_split_name=train",
|
10 |
-
"--eval_split_name=test_parl",
|
11 |
-
"--model_index_name=Whisper Small Basque",
|
12 |
-
"--max_steps=8000",
|
13 |
-
"--output_dir=./",
|
14 |
-
"--per_device_train_batch_size=32",
|
15 |
-
"--per_device_eval_batch_size=16",
|
16 |
-
"--gradient_accumulation_steps=1",
|
17 |
-
"--logging_steps=25",
|
18 |
-
"--learning_rate=1e-5",
|
19 |
-
"--warmup_steps=500",
|
20 |
-
"--evaluation_strategy=steps",
|
21 |
-
"--eval_steps=1000",
|
22 |
-
"--save_strategy=steps",
|
23 |
-
"--save_steps=1000",
|
24 |
-
"--generation_max_length=225",
|
25 |
-
"--length_column_name=input_length",
|
26 |
-
"--max_duration_in_seconds=30",
|
27 |
-
"--text_column_name=sentence",
|
28 |
-
"--freeze_feature_encoder=False",
|
29 |
-
"--report_to=tensorboard",
|
30 |
-
"--metric_for_best_model=wer",
|
31 |
-
"--greater_is_better=False",
|
32 |
-
"--load_best_model_at_end",
|
33 |
-
"--gradient_checkpointing",
|
34 |
-
"--fp16",
|
35 |
-
"--overwrite_output_dir",
|
36 |
-
"--do_train",
|
37 |
-
"--do_eval",
|
38 |
-
"--predict_with_generate",
|
39 |
-
"--do_normalize_eval",
|
40 |
-
"--streaming",
|
41 |
-
"--use_auth_token",
|
42 |
-
"--push_to_hub",
|
43 |
-
"--report_to",
|
44 |
-
"wandb",
|
45 |
-
"--run_name",
|
46 |
-
"whisper-small-eu"
|
47 |
-
],
|
48 |
-
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
-
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
-
"git": {
|
51 |
-
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
-
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
-
},
|
54 |
-
"email": "[email protected]",
|
55 |
-
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
-
"host": "tknika",
|
57 |
-
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
-
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
-
"cpu_count": 8,
|
60 |
-
"cpu_count_logical": 8,
|
61 |
-
"gpu": "NVIDIA L40-48Q",
|
62 |
-
"gpu_count": 1,
|
63 |
-
"disk": {
|
64 |
-
"/": {
|
65 |
-
"total": "525987168256",
|
66 |
-
"used": "313777016832"
|
67 |
-
}
|
68 |
-
},
|
69 |
-
"memory": {
|
70 |
-
"total": "33654022144"
|
71 |
-
},
|
72 |
-
"cpu": {
|
73 |
-
"count": 8,
|
74 |
-
"countLogical": 8
|
75 |
-
},
|
76 |
-
"gpu_nvidia": [
|
77 |
-
{
|
78 |
-
"name": "NVIDIA L40-48Q",
|
79 |
-
"memoryTotal": "51539607552",
|
80 |
-
"cudaCores": 18176,
|
81 |
-
"architecture": "Ada"
|
82 |
-
}
|
83 |
-
],
|
84 |
-
"cudaVersion": "12.4"
|
85 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"_wandb":{"runtime":0}}
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
-
{"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112}
|
3 |
-
{"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}}
|
4 |
-
{"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"}
|
5 |
-
{"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
|
6 |
-
{"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
|
7 |
-
{"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"}
|
8 |
-
{"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"}
|
9 |
-
{"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"}
|
10 |
-
{"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"}
|
11 |
-
{"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"}
|
12 |
-
{"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"}
|
13 |
-
{"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"}
|
14 |
-
{"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"}
|
2 |
-
{"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"}
|
3 |
-
{"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"}
|
4 |
-
{"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"}
|
5 |
-
{"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"}
|
6 |
-
{"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"}
|
7 |
-
{"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
-
{"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"}
|
9 |
-
{"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
-
{"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
-
{"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
-
{"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"}
|
13 |
-
{"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"}
|
14 |
-
{"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"}
|
15 |
-
{"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
2025-02-12 12:52:02,886 INFO MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112
|
3 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
|
7 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
|
8 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():756] calling init triggers
|
9 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
-
config: {'_wandb': {}}
|
11 |
-
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():789] starting backend
|
12 |
-
2025-02-12 12:52:03,097 INFO MainThread:226112 [wandb_init.py:init():793] sending inform_init request
|
13 |
-
2025-02-12 12:52:03,104 INFO MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
-
2025-02-12 12:52:03,104 INFO MainThread:226112 [wandb_init.py:init():808] backend started and connected
|
15 |
-
2025-02-12 12:52:03,107 INFO MainThread:226112 [wandb_init.py:init():901] updated telemetry
|
16 |
-
2025-02-12 12:52:03,114 INFO MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
-
2025-02-12 12:52:03,483 INFO MainThread:226112 [wandb_init.py:init():994] starting run threads in backend
|
18 |
-
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
-
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
-
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
-
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
-
2025-02-12 12:52:03,568 INFO MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
-
2025-02-12 12:52:03,569 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
-
2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7d4830f2ddf0>>
|
25 |
-
2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
-
2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb
DELETED
Binary file (11.3 kB)
|
|
wandb/run-20250212_125924-xhsgsxqq/files/config.yaml
DELETED
@@ -1,512 +0,0 @@
|
|
1 |
-
_attn_implementation_autoset:
|
2 |
-
value: true
|
3 |
-
_name_or_path:
|
4 |
-
value: openai/whisper-small
|
5 |
-
_wandb:
|
6 |
-
value:
|
7 |
-
cli_version: 0.19.6
|
8 |
-
m:
|
9 |
-
- "1": train/global_step
|
10 |
-
"6":
|
11 |
-
- 3
|
12 |
-
"7": []
|
13 |
-
python_version: 3.12.3
|
14 |
-
t:
|
15 |
-
"1":
|
16 |
-
- 1
|
17 |
-
- 5
|
18 |
-
- 11
|
19 |
-
- 49
|
20 |
-
- 51
|
21 |
-
- 53
|
22 |
-
- 55
|
23 |
-
- 71
|
24 |
-
- 100
|
25 |
-
"2":
|
26 |
-
- 1
|
27 |
-
- 5
|
28 |
-
- 11
|
29 |
-
- 49
|
30 |
-
- 51
|
31 |
-
- 53
|
32 |
-
- 55
|
33 |
-
- 71
|
34 |
-
- 100
|
35 |
-
"3":
|
36 |
-
- 7
|
37 |
-
- 13
|
38 |
-
- 19
|
39 |
-
- 23
|
40 |
-
- 55
|
41 |
-
- 66
|
42 |
-
"4": 3.12.3
|
43 |
-
"5": 0.19.6
|
44 |
-
"6": 4.49.0.dev0
|
45 |
-
"8":
|
46 |
-
- 5
|
47 |
-
"9":
|
48 |
-
"1": transformers_trainer
|
49 |
-
"12": 0.19.6
|
50 |
-
"13": linux-x86_64
|
51 |
-
accelerator_config:
|
52 |
-
value:
|
53 |
-
dispatch_batches: null
|
54 |
-
even_batches: true
|
55 |
-
gradient_accumulation_kwargs: null
|
56 |
-
non_blocking: false
|
57 |
-
split_batches: false
|
58 |
-
use_seedable_sampler: true
|
59 |
-
activation_dropout:
|
60 |
-
value: 0
|
61 |
-
activation_function:
|
62 |
-
value: gelu
|
63 |
-
adafactor:
|
64 |
-
value: false
|
65 |
-
adam_beta1:
|
66 |
-
value: 0.9
|
67 |
-
adam_beta2:
|
68 |
-
value: 0.999
|
69 |
-
adam_epsilon:
|
70 |
-
value: 1e-08
|
71 |
-
add_cross_attention:
|
72 |
-
value: false
|
73 |
-
apply_spec_augment:
|
74 |
-
value: false
|
75 |
-
architectures:
|
76 |
-
value:
|
77 |
-
- WhisperForConditionalGeneration
|
78 |
-
attention_dropout:
|
79 |
-
value: 0
|
80 |
-
auto_find_batch_size:
|
81 |
-
value: false
|
82 |
-
average_tokens_across_devices:
|
83 |
-
value: false
|
84 |
-
bad_words_ids:
|
85 |
-
value: null
|
86 |
-
batch_eval_metrics:
|
87 |
-
value: false
|
88 |
-
begin_suppress_tokens:
|
89 |
-
value:
|
90 |
-
- 220
|
91 |
-
- 50257
|
92 |
-
bf16:
|
93 |
-
value: false
|
94 |
-
bf16_full_eval:
|
95 |
-
value: false
|
96 |
-
bos_token_id:
|
97 |
-
value: 50257
|
98 |
-
chunk_size_feed_forward:
|
99 |
-
value: 0
|
100 |
-
classifier_proj_size:
|
101 |
-
value: 256
|
102 |
-
cross_attention_hidden_size:
|
103 |
-
value: null
|
104 |
-
d_model:
|
105 |
-
value: 768
|
106 |
-
data_seed:
|
107 |
-
value: null
|
108 |
-
dataloader_drop_last:
|
109 |
-
value: false
|
110 |
-
dataloader_num_workers:
|
111 |
-
value: 0
|
112 |
-
dataloader_persistent_workers:
|
113 |
-
value: false
|
114 |
-
dataloader_pin_memory:
|
115 |
-
value: true
|
116 |
-
dataloader_prefetch_factor:
|
117 |
-
value: null
|
118 |
-
ddp_backend:
|
119 |
-
value: null
|
120 |
-
ddp_broadcast_buffers:
|
121 |
-
value: null
|
122 |
-
ddp_bucket_cap_mb:
|
123 |
-
value: null
|
124 |
-
ddp_find_unused_parameters:
|
125 |
-
value: null
|
126 |
-
ddp_timeout:
|
127 |
-
value: 1800
|
128 |
-
debug:
|
129 |
-
value: []
|
130 |
-
decoder_attention_heads:
|
131 |
-
value: 12
|
132 |
-
decoder_ffn_dim:
|
133 |
-
value: 3072
|
134 |
-
decoder_layerdrop:
|
135 |
-
value: 0
|
136 |
-
decoder_layers:
|
137 |
-
value: 12
|
138 |
-
decoder_start_token_id:
|
139 |
-
value: 50258
|
140 |
-
deepspeed:
|
141 |
-
value: null
|
142 |
-
disable_tqdm:
|
143 |
-
value: false
|
144 |
-
dispatch_batches:
|
145 |
-
value: null
|
146 |
-
diversity_penalty:
|
147 |
-
value: 0
|
148 |
-
do_eval:
|
149 |
-
value: true
|
150 |
-
do_predict:
|
151 |
-
value: false
|
152 |
-
do_sample:
|
153 |
-
value: false
|
154 |
-
do_train:
|
155 |
-
value: true
|
156 |
-
dropout:
|
157 |
-
value: 0
|
158 |
-
early_stopping:
|
159 |
-
value: false
|
160 |
-
encoder_attention_heads:
|
161 |
-
value: 12
|
162 |
-
encoder_ffn_dim:
|
163 |
-
value: 3072
|
164 |
-
encoder_layerdrop:
|
165 |
-
value: 0
|
166 |
-
encoder_layers:
|
167 |
-
value: 12
|
168 |
-
encoder_no_repeat_ngram_size:
|
169 |
-
value: 0
|
170 |
-
eos_token_id:
|
171 |
-
value: 50257
|
172 |
-
eval_accumulation_steps:
|
173 |
-
value: null
|
174 |
-
eval_delay:
|
175 |
-
value: 0
|
176 |
-
eval_do_concat_batches:
|
177 |
-
value: true
|
178 |
-
eval_on_start:
|
179 |
-
value: false
|
180 |
-
eval_steps:
|
181 |
-
value: 1000
|
182 |
-
eval_strategy:
|
183 |
-
value: steps
|
184 |
-
eval_use_gather_object:
|
185 |
-
value: false
|
186 |
-
evaluation_strategy:
|
187 |
-
value: steps
|
188 |
-
exponential_decay_length_penalty:
|
189 |
-
value: null
|
190 |
-
finetuning_task:
|
191 |
-
value: null
|
192 |
-
forced_bos_token_id:
|
193 |
-
value: null
|
194 |
-
forced_decoder_ids:
|
195 |
-
value: null
|
196 |
-
forced_eos_token_id:
|
197 |
-
value: null
|
198 |
-
fp16:
|
199 |
-
value: true
|
200 |
-
fp16_backend:
|
201 |
-
value: auto
|
202 |
-
fp16_full_eval:
|
203 |
-
value: false
|
204 |
-
fp16_opt_level:
|
205 |
-
value: O1
|
206 |
-
fsdp:
|
207 |
-
value: []
|
208 |
-
fsdp_config:
|
209 |
-
value:
|
210 |
-
min_num_params: 0
|
211 |
-
xla: false
|
212 |
-
xla_fsdp_grad_ckpt: false
|
213 |
-
xla_fsdp_v2: false
|
214 |
-
fsdp_min_num_params:
|
215 |
-
value: 0
|
216 |
-
fsdp_transformer_layer_cls_to_wrap:
|
217 |
-
value: null
|
218 |
-
full_determinism:
|
219 |
-
value: false
|
220 |
-
generation_config:
|
221 |
-
value: null
|
222 |
-
generation_max_length:
|
223 |
-
value: 225
|
224 |
-
generation_num_beams:
|
225 |
-
value: null
|
226 |
-
gradient_accumulation_steps:
|
227 |
-
value: 1
|
228 |
-
gradient_checkpointing:
|
229 |
-
value: true
|
230 |
-
gradient_checkpointing_kwargs:
|
231 |
-
value: null
|
232 |
-
greater_is_better:
|
233 |
-
value: false
|
234 |
-
group_by_length:
|
235 |
-
value: false
|
236 |
-
half_precision_backend:
|
237 |
-
value: auto
|
238 |
-
hub_always_push:
|
239 |
-
value: false
|
240 |
-
hub_model_id:
|
241 |
-
value: null
|
242 |
-
hub_private_repo:
|
243 |
-
value: null
|
244 |
-
hub_strategy:
|
245 |
-
value: every_save
|
246 |
-
hub_token:
|
247 |
-
value: <HUB_TOKEN>
|
248 |
-
id2label:
|
249 |
-
value:
|
250 |
-
"0": LABEL_0
|
251 |
-
"1": LABEL_1
|
252 |
-
ignore_data_skip:
|
253 |
-
value: false
|
254 |
-
include_for_metrics:
|
255 |
-
value: []
|
256 |
-
include_inputs_for_metrics:
|
257 |
-
value: false
|
258 |
-
include_num_input_tokens_seen:
|
259 |
-
value: false
|
260 |
-
include_tokens_per_second:
|
261 |
-
value: false
|
262 |
-
init_std:
|
263 |
-
value: 0.02
|
264 |
-
is_decoder:
|
265 |
-
value: false
|
266 |
-
is_encoder_decoder:
|
267 |
-
value: true
|
268 |
-
jit_mode_eval:
|
269 |
-
value: false
|
270 |
-
label_names:
|
271 |
-
value: null
|
272 |
-
label_smoothing_factor:
|
273 |
-
value: 0
|
274 |
-
label2id:
|
275 |
-
value:
|
276 |
-
LABEL_0: 0
|
277 |
-
LABEL_1: 1
|
278 |
-
learning_rate:
|
279 |
-
value: 1e-05
|
280 |
-
length_column_name:
|
281 |
-
value: input_length
|
282 |
-
length_penalty:
|
283 |
-
value: 1
|
284 |
-
load_best_model_at_end:
|
285 |
-
value: true
|
286 |
-
local_rank:
|
287 |
-
value: 0
|
288 |
-
log_level:
|
289 |
-
value: passive
|
290 |
-
log_level_replica:
|
291 |
-
value: warning
|
292 |
-
log_on_each_node:
|
293 |
-
value: true
|
294 |
-
logging_dir:
|
295 |
-
value: ./runs/Feb12_12-58-59_tknika
|
296 |
-
logging_first_step:
|
297 |
-
value: false
|
298 |
-
logging_nan_inf_filter:
|
299 |
-
value: true
|
300 |
-
logging_steps:
|
301 |
-
value: 25
|
302 |
-
logging_strategy:
|
303 |
-
value: steps
|
304 |
-
lr_scheduler_type:
|
305 |
-
value: linear
|
306 |
-
mask_feature_length:
|
307 |
-
value: 10
|
308 |
-
mask_feature_min_masks:
|
309 |
-
value: 0
|
310 |
-
mask_feature_prob:
|
311 |
-
value: 0
|
312 |
-
mask_time_length:
|
313 |
-
value: 10
|
314 |
-
mask_time_min_masks:
|
315 |
-
value: 2
|
316 |
-
mask_time_prob:
|
317 |
-
value: 0.05
|
318 |
-
max_grad_norm:
|
319 |
-
value: 1
|
320 |
-
max_length:
|
321 |
-
value: 448
|
322 |
-
max_source_positions:
|
323 |
-
value: 1500
|
324 |
-
max_steps:
|
325 |
-
value: 8000
|
326 |
-
max_target_positions:
|
327 |
-
value: 448
|
328 |
-
median_filter_width:
|
329 |
-
value: 7
|
330 |
-
metric_for_best_model:
|
331 |
-
value: wer
|
332 |
-
min_length:
|
333 |
-
value: 0
|
334 |
-
model/num_parameters:
|
335 |
-
value: 241734912
|
336 |
-
model_type:
|
337 |
-
value: whisper
|
338 |
-
mp_parameters:
|
339 |
-
value: ""
|
340 |
-
neftune_noise_alpha:
|
341 |
-
value: null
|
342 |
-
no_cuda:
|
343 |
-
value: false
|
344 |
-
no_repeat_ngram_size:
|
345 |
-
value: 0
|
346 |
-
num_beam_groups:
|
347 |
-
value: 1
|
348 |
-
num_beams:
|
349 |
-
value: 1
|
350 |
-
num_hidden_layers:
|
351 |
-
value: 12
|
352 |
-
num_mel_bins:
|
353 |
-
value: 80
|
354 |
-
num_return_sequences:
|
355 |
-
value: 1
|
356 |
-
num_train_epochs:
|
357 |
-
value: 3
|
358 |
-
optim:
|
359 |
-
value: adamw_torch
|
360 |
-
optim_args:
|
361 |
-
value: null
|
362 |
-
optim_target_modules:
|
363 |
-
value: null
|
364 |
-
output_attentions:
|
365 |
-
value: false
|
366 |
-
output_dir:
|
367 |
-
value: ./
|
368 |
-
output_hidden_states:
|
369 |
-
value: false
|
370 |
-
output_scores:
|
371 |
-
value: false
|
372 |
-
overwrite_output_dir:
|
373 |
-
value: true
|
374 |
-
pad_token_id:
|
375 |
-
value: 50257
|
376 |
-
past_index:
|
377 |
-
value: -1
|
378 |
-
per_device_eval_batch_size:
|
379 |
-
value: 16
|
380 |
-
per_device_train_batch_size:
|
381 |
-
value: 32
|
382 |
-
per_gpu_eval_batch_size:
|
383 |
-
value: null
|
384 |
-
per_gpu_train_batch_size:
|
385 |
-
value: null
|
386 |
-
predict_with_generate:
|
387 |
-
value: true
|
388 |
-
prediction_loss_only:
|
389 |
-
value: false
|
390 |
-
prefix:
|
391 |
-
value: null
|
392 |
-
problem_type:
|
393 |
-
value: null
|
394 |
-
push_to_hub:
|
395 |
-
value: true
|
396 |
-
push_to_hub_model_id:
|
397 |
-
value: null
|
398 |
-
push_to_hub_organization:
|
399 |
-
value: null
|
400 |
-
push_to_hub_token:
|
401 |
-
value: <PUSH_TO_HUB_TOKEN>
|
402 |
-
ray_scope:
|
403 |
-
value: last
|
404 |
-
remove_invalid_values:
|
405 |
-
value: false
|
406 |
-
remove_unused_columns:
|
407 |
-
value: true
|
408 |
-
repetition_penalty:
|
409 |
-
value: 1
|
410 |
-
report_to:
|
411 |
-
value:
|
412 |
-
- wandb
|
413 |
-
restore_callback_states_from_checkpoint:
|
414 |
-
value: false
|
415 |
-
resume_from_checkpoint:
|
416 |
-
value: null
|
417 |
-
return_dict:
|
418 |
-
value: true
|
419 |
-
return_dict_in_generate:
|
420 |
-
value: false
|
421 |
-
run_name:
|
422 |
-
value: whisper-small-eu
|
423 |
-
save_on_each_node:
|
424 |
-
value: false
|
425 |
-
save_only_model:
|
426 |
-
value: false
|
427 |
-
save_safetensors:
|
428 |
-
value: true
|
429 |
-
save_steps:
|
430 |
-
value: 1000
|
431 |
-
save_strategy:
|
432 |
-
value: steps
|
433 |
-
save_total_limit:
|
434 |
-
value: null
|
435 |
-
scale_embedding:
|
436 |
-
value: false
|
437 |
-
seed:
|
438 |
-
value: 42
|
439 |
-
sep_token_id:
|
440 |
-
value: null
|
441 |
-
skip_memory_metrics:
|
442 |
-
value: true
|
443 |
-
sortish_sampler:
|
444 |
-
value: false
|
445 |
-
split_batches:
|
446 |
-
value: null
|
447 |
-
suppress_tokens:
|
448 |
-
value: null
|
449 |
-
task_specific_params:
|
450 |
-
value: null
|
451 |
-
temperature:
|
452 |
-
value: 1
|
453 |
-
tf_legacy_loss:
|
454 |
-
value: false
|
455 |
-
tf32:
|
456 |
-
value: null
|
457 |
-
tie_encoder_decoder:
|
458 |
-
value: false
|
459 |
-
tie_word_embeddings:
|
460 |
-
value: true
|
461 |
-
tokenizer_class:
|
462 |
-
value: null
|
463 |
-
top_k:
|
464 |
-
value: 50
|
465 |
-
top_p:
|
466 |
-
value: 1
|
467 |
-
torch_compile:
|
468 |
-
value: false
|
469 |
-
torch_compile_backend:
|
470 |
-
value: null
|
471 |
-
torch_compile_mode:
|
472 |
-
value: null
|
473 |
-
torch_dtype:
|
474 |
-
value: float32
|
475 |
-
torch_empty_cache_steps:
|
476 |
-
value: null
|
477 |
-
torchdynamo:
|
478 |
-
value: null
|
479 |
-
torchscript:
|
480 |
-
value: false
|
481 |
-
tpu_metrics_debug:
|
482 |
-
value: false
|
483 |
-
tpu_num_cores:
|
484 |
-
value: null
|
485 |
-
transformers_version:
|
486 |
-
value: 4.49.0.dev0
|
487 |
-
typical_p:
|
488 |
-
value: 1
|
489 |
-
use_bfloat16:
|
490 |
-
value: false
|
491 |
-
use_cache:
|
492 |
-
value: false
|
493 |
-
use_cpu:
|
494 |
-
value: false
|
495 |
-
use_ipex:
|
496 |
-
value: false
|
497 |
-
use_legacy_prediction_loop:
|
498 |
-
value: false
|
499 |
-
use_liger_kernel:
|
500 |
-
value: false
|
501 |
-
use_mps_device:
|
502 |
-
value: false
|
503 |
-
use_weighted_layer_sum:
|
504 |
-
value: false
|
505 |
-
vocab_size:
|
506 |
-
value: 51865
|
507 |
-
warmup_ratio:
|
508 |
-
value: 0
|
509 |
-
warmup_steps:
|
510 |
-
value: 500
|
511 |
-
weight_decay:
|
512 |
-
value: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/files/output.log
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 632, in <module>
|
3 |
-
main()
|
4 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main
|
5 |
-
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
-
return inner_training_loop(
|
9 |
-
^^^^^^^^^^^^^^^^^^^^
|
10 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
-
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
-
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
-
result = getattr(callback, event)(
|
18 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
|
20 |
-
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
-
AttributeError: 'NoneType' object has no attribute 'dataset'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt
DELETED
@@ -1,115 +0,0 @@
|
|
1 |
-
aiosignal==1.3.2
|
2 |
-
Markdown==3.7
|
3 |
-
more-itertools==10.6.0
|
4 |
-
requests==2.32.3
|
5 |
-
sentry-sdk==2.21.0
|
6 |
-
torchaudio==2.6.0
|
7 |
-
charset-normalizer==3.4.1
|
8 |
-
docker-pycreds==0.4.0
|
9 |
-
nvidia-cusolver-cu12==11.6.1.9
|
10 |
-
PyYAML==6.0.2
|
11 |
-
librosa==0.10.2.post1
|
12 |
-
soxr==0.5.0.post1
|
13 |
-
multiprocess==0.70.16
|
14 |
-
setuptools==75.8.0
|
15 |
-
nvidia-cufft-cu12==11.2.1.3
|
16 |
-
joblib==1.4.2
|
17 |
-
pytz==2025.1
|
18 |
-
pip==24.0
|
19 |
-
scikit-learn==1.6.1
|
20 |
-
certifi==2025.1.31
|
21 |
-
jiwer==3.1.0
|
22 |
-
regex==2024.11.6
|
23 |
-
annotated-types==0.7.0
|
24 |
-
grpcio==1.70.0
|
25 |
-
msgpack==1.1.0
|
26 |
-
mpmath==1.3.0
|
27 |
-
nvidia-cudnn-cu12==9.1.0.70
|
28 |
-
soundfile==0.13.1
|
29 |
-
dill==0.3.8
|
30 |
-
nvidia-nvtx-cu12==12.4.127
|
31 |
-
six==1.17.0
|
32 |
-
nvidia-cuda-cupti-cu12==12.4.127
|
33 |
-
pyarrow==19.0.0
|
34 |
-
nvidia-nccl-cu12==2.21.5
|
35 |
-
psutil==6.1.1
|
36 |
-
decorator==5.1.1
|
37 |
-
llvmlite==0.44.0
|
38 |
-
frozenlist==1.5.0
|
39 |
-
pydantic==2.10.6
|
40 |
-
networkx==3.4.2
|
41 |
-
idna==3.10
|
42 |
-
wandb==0.19.6
|
43 |
-
aiohttp==3.11.12
|
44 |
-
RapidFuzz==3.12.1
|
45 |
-
pandas==2.2.3
|
46 |
-
python-dateutil==2.9.0.post0
|
47 |
-
numpy==2.1.3
|
48 |
-
tokenizers==0.21.0
|
49 |
-
nvidia-cusparselt-cu12==0.6.2
|
50 |
-
typing_extensions==4.12.2
|
51 |
-
urllib3==2.3.0
|
52 |
-
setproctitle==1.3.4
|
53 |
-
tzdata==2025.1
|
54 |
-
sympy==1.13.1
|
55 |
-
pooch==1.8.2
|
56 |
-
click==8.1.8
|
57 |
-
pydantic_core==2.27.2
|
58 |
-
MarkupSafe==3.0.2
|
59 |
-
scipy==1.15.1
|
60 |
-
accelerate==1.3.0
|
61 |
-
tensorboard==2.19.0
|
62 |
-
protobuf==5.29.3
|
63 |
-
gitdb==4.0.12
|
64 |
-
smmap==5.0.2
|
65 |
-
absl-py==2.1.0
|
66 |
-
tqdm==4.67.1
|
67 |
-
yarl==1.18.3
|
68 |
-
pycparser==2.22
|
69 |
-
nvidia-cusparse-cu12==12.3.1.170
|
70 |
-
attrs==25.1.0
|
71 |
-
lazy_loader==0.4
|
72 |
-
tensorboard-data-server==0.7.2
|
73 |
-
threadpoolctl==3.5.0
|
74 |
-
GitPython==3.1.44
|
75 |
-
safetensors==0.5.2
|
76 |
-
fsspec==2024.12.0
|
77 |
-
nvidia-cuda-nvrtc-cu12==12.4.127
|
78 |
-
filelock==3.17.0
|
79 |
-
aiohappyeyeballs==2.4.6
|
80 |
-
packaging==24.2
|
81 |
-
datasets==3.2.1.dev0
|
82 |
-
audioread==3.0.1
|
83 |
-
propcache==0.2.1
|
84 |
-
transformers==4.49.0.dev0
|
85 |
-
nvidia-cuda-runtime-cu12==12.4.127
|
86 |
-
cffi==1.17.1
|
87 |
-
evaluate==0.4.3
|
88 |
-
Werkzeug==3.1.3
|
89 |
-
huggingface-hub==0.28.1
|
90 |
-
Jinja2==3.1.5
|
91 |
-
torch==2.6.0
|
92 |
-
nvidia-curand-cu12==10.3.5.147
|
93 |
-
xxhash==3.5.0
|
94 |
-
platformdirs==4.3.6
|
95 |
-
multidict==6.1.0
|
96 |
-
nvidia-cublas-cu12==12.4.5.8
|
97 |
-
nvidia-nvjitlink-cu12==12.4.127
|
98 |
-
triton==3.2.0
|
99 |
-
numba==0.61.0
|
100 |
-
importlib_metadata==8.0.0
|
101 |
-
platformdirs==4.2.2
|
102 |
-
typeguard==4.3.0
|
103 |
-
more-itertools==10.3.0
|
104 |
-
tomli==2.0.1
|
105 |
-
autocommand==2.2.2
|
106 |
-
zipp==3.19.2
|
107 |
-
typing_extensions==4.12.2
|
108 |
-
backports.tarfile==1.2.0
|
109 |
-
inflect==7.3.1
|
110 |
-
jaraco.text==3.12.1
|
111 |
-
wheel==0.43.0
|
112 |
-
packaging==24.2
|
113 |
-
jaraco.collections==5.1.0
|
114 |
-
jaraco.functools==4.0.1
|
115 |
-
jaraco.context==5.3.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
3 |
-
"python": "CPython 3.12.3",
|
4 |
-
"startedAt": "2025-02-12T12:59:24.816046Z",
|
5 |
-
"args": [
|
6 |
-
"--model_name_or_path=openai/whisper-small",
|
7 |
-
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
8 |
-
"--language=basque",
|
9 |
-
"--train_split_name=train",
|
10 |
-
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
11 |
-
"--model_index_name=Whisper Small Basque",
|
12 |
-
"--max_steps=8000",
|
13 |
-
"--output_dir=./",
|
14 |
-
"--per_device_train_batch_size=32",
|
15 |
-
"--per_device_eval_batch_size=16",
|
16 |
-
"--gradient_accumulation_steps=1",
|
17 |
-
"--logging_steps=25",
|
18 |
-
"--learning_rate=1e-5",
|
19 |
-
"--warmup_steps=500",
|
20 |
-
"--evaluation_strategy=steps",
|
21 |
-
"--eval_steps=1000",
|
22 |
-
"--save_strategy=steps",
|
23 |
-
"--save_steps=1000",
|
24 |
-
"--generation_max_length=225",
|
25 |
-
"--length_column_name=input_length",
|
26 |
-
"--max_duration_in_seconds=30",
|
27 |
-
"--text_column_name=sentence",
|
28 |
-
"--freeze_feature_encoder=False",
|
29 |
-
"--report_to=tensorboard",
|
30 |
-
"--metric_for_best_model=wer",
|
31 |
-
"--greater_is_better=False",
|
32 |
-
"--load_best_model_at_end",
|
33 |
-
"--gradient_checkpointing",
|
34 |
-
"--fp16",
|
35 |
-
"--overwrite_output_dir",
|
36 |
-
"--do_train",
|
37 |
-
"--do_eval",
|
38 |
-
"--predict_with_generate",
|
39 |
-
"--do_normalize_eval",
|
40 |
-
"--streaming",
|
41 |
-
"--use_auth_token",
|
42 |
-
"--push_to_hub",
|
43 |
-
"--report_to",
|
44 |
-
"wandb",
|
45 |
-
"--run_name",
|
46 |
-
"whisper-small-eu"
|
47 |
-
],
|
48 |
-
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
49 |
-
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
50 |
-
"git": {
|
51 |
-
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
52 |
-
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
53 |
-
},
|
54 |
-
"email": "[email protected]",
|
55 |
-
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
56 |
-
"host": "tknika",
|
57 |
-
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
58 |
-
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
59 |
-
"cpu_count": 8,
|
60 |
-
"cpu_count_logical": 8,
|
61 |
-
"gpu": "NVIDIA L40-48Q",
|
62 |
-
"gpu_count": 1,
|
63 |
-
"disk": {
|
64 |
-
"/": {
|
65 |
-
"total": "525987168256",
|
66 |
-
"used": "313777115136"
|
67 |
-
}
|
68 |
-
},
|
69 |
-
"memory": {
|
70 |
-
"total": "33654022144"
|
71 |
-
},
|
72 |
-
"cpu": {
|
73 |
-
"count": 8,
|
74 |
-
"countLogical": 8
|
75 |
-
},
|
76 |
-
"gpu_nvidia": [
|
77 |
-
{
|
78 |
-
"name": "NVIDIA L40-48Q",
|
79 |
-
"memoryTotal": "51539607552",
|
80 |
-
"cudaCores": 18176,
|
81 |
-
"architecture": "Ada"
|
82 |
-
}
|
83 |
-
],
|
84 |
-
"cudaVersion": "12.4"
|
85 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"_wandb":{"runtime":0}}
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:59:24.63359638Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa90v2n0h/port-226591.txt","pid":226591,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
2 |
-
{"time":"2025-02-12T12:59:24.673351851Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226591}
|
3 |
-
{"time":"2025-02-12T12:59:24.673324591Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41203,"Zone":""}}
|
4 |
-
{"time":"2025-02-12T12:59:24.809390061Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:53388"}
|
5 |
-
{"time":"2025-02-12T12:59:24.819517706Z","level":"INFO","msg":"handleInformInit: received","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
|
6 |
-
{"time":"2025-02-12T12:59:24.923364896Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
|
7 |
-
{"time":"2025-02-12T12:59:25.341856618Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:53388"}
|
8 |
-
{"time":"2025-02-12T12:59:25.341962867Z","level":"INFO","msg":"server is shutting down"}
|
9 |
-
{"time":"2025-02-12T12:59:25.341964847Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:53388"}
|
10 |
-
{"time":"2025-02-12T12:59:25.342139496Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:53388"}
|
11 |
-
{"time":"2025-02-12T12:59:25.569637185Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41203->127.0.0.1:53388: use of closed network connection","id":"127.0.0.1:53388"}
|
12 |
-
{"time":"2025-02-12T12:59:26.643739482Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:53388"}
|
13 |
-
{"time":"2025-02-12T12:59:26.643783881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:53388"}
|
14 |
-
{"time":"2025-02-12T12:59:26.643808411Z","level":"INFO","msg":"server is closed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
{"time":"2025-02-12T12:59:24.819689255Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log"}
|
2 |
-
{"time":"2025-02-12T12:59:24.923303396Z","level":"INFO","msg":"created new stream","id":"xhsgsxqq"}
|
3 |
-
{"time":"2025-02-12T12:59:24.923354596Z","level":"INFO","msg":"stream: started","id":"xhsgsxqq"}
|
4 |
-
{"time":"2025-02-12T12:59:24.923472545Z","level":"INFO","msg":"writer: Do: started","stream_id":"xhsgsxqq"}
|
5 |
-
{"time":"2025-02-12T12:59:24.923494475Z","level":"INFO","msg":"handler: started","stream_id":"xhsgsxqq"}
|
6 |
-
{"time":"2025-02-12T12:59:24.923560215Z","level":"INFO","msg":"sender: started","stream_id":"xhsgsxqq"}
|
7 |
-
{"time":"2025-02-12T12:59:25.192419842Z","level":"INFO","msg":"Starting system monitor"}
|
8 |
-
{"time":"2025-02-12T12:59:25.341944447Z","level":"INFO","msg":"stream: closing","id":"xhsgsxqq"}
|
9 |
-
{"time":"2025-02-12T12:59:25.341971537Z","level":"INFO","msg":"Stopping system monitor"}
|
10 |
-
{"time":"2025-02-12T12:59:25.342739283Z","level":"INFO","msg":"Stopped system monitor"}
|
11 |
-
{"time":"2025-02-12T12:59:26.408412135Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
12 |
-
{"time":"2025-02-12T12:59:26.643441283Z","level":"INFO","msg":"handler: closed","stream_id":"xhsgsxqq"}
|
13 |
-
{"time":"2025-02-12T12:59:26.643483513Z","level":"INFO","msg":"writer: Close: closed","stream_id":"xhsgsxqq"}
|
14 |
-
{"time":"2025-02-12T12:59:26.643525403Z","level":"INFO","msg":"sender: closed","stream_id":"xhsgsxqq"}
|
15 |
-
{"time":"2025-02-12T12:59:26.643566923Z","level":"INFO","msg":"stream: closed","id":"xhsgsxqq"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
2 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Configure stats pid to 226591
|
3 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
4 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
5 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
6 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
|
7 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
|
8 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():756] calling init triggers
|
9 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
10 |
-
config: {'_wandb': {}}
|
11 |
-
2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():789] starting backend
|
12 |
-
2025-02-12 12:59:24,809 INFO MainThread:226591 [wandb_init.py:init():793] sending inform_init request
|
13 |
-
2025-02-12 12:59:24,815 INFO MainThread:226591 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
14 |
-
2025-02-12 12:59:24,815 INFO MainThread:226591 [wandb_init.py:init():808] backend started and connected
|
15 |
-
2025-02-12 12:59:24,818 INFO MainThread:226591 [wandb_init.py:init():901] updated telemetry
|
16 |
-
2025-02-12 12:59:24,823 INFO MainThread:226591 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
17 |
-
2025-02-12 12:59:25,189 INFO MainThread:226591 [wandb_init.py:init():994] starting run threads in backend
|
18 |
-
2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_console_start():2385] atexit reg
|
19 |
-
2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
20 |
-
2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
21 |
-
2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2325] Redirects installed.
|
22 |
-
2025-02-12 12:59:25,303 INFO MainThread:226591 [wandb_init.py:init():1036] run started, returning control to user process
|
23 |
-
2025-02-12 12:59:25,304 INFO MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-58-59_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
24 |
-
2025-02-12 12:59:25,306 INFO MainThread:226591 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x76451d282f30>>
|
25 |
-
2025-02-12 12:59:25,306 INFO MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
26 |
-
2025-02-12 12:59:25,342 WARNING MsgRouterThr:226591 [router.py:message_loop():75] message_loop has been closed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb
DELETED
Binary file (11.3 kB)
|
|
wandb/run-20250212_130533-zeu6vay4/files/config.yaml
DELETED
@@ -1,512 +0,0 @@
|
|
1 |
-
_attn_implementation_autoset:
|
2 |
-
value: true
|
3 |
-
_name_or_path:
|
4 |
-
value: openai/whisper-small
|
5 |
-
_wandb:
|
6 |
-
value:
|
7 |
-
cli_version: 0.19.6
|
8 |
-
m:
|
9 |
-
- "1": train/global_step
|
10 |
-
"6":
|
11 |
-
- 3
|
12 |
-
"7": []
|
13 |
-
python_version: 3.12.3
|
14 |
-
t:
|
15 |
-
"1":
|
16 |
-
- 1
|
17 |
-
- 5
|
18 |
-
- 11
|
19 |
-
- 49
|
20 |
-
- 51
|
21 |
-
- 53
|
22 |
-
- 55
|
23 |
-
- 71
|
24 |
-
- 100
|
25 |
-
"2":
|
26 |
-
- 1
|
27 |
-
- 5
|
28 |
-
- 11
|
29 |
-
- 49
|
30 |
-
- 51
|
31 |
-
- 53
|
32 |
-
- 55
|
33 |
-
- 71
|
34 |
-
- 100
|
35 |
-
"3":
|
36 |
-
- 7
|
37 |
-
- 13
|
38 |
-
- 19
|
39 |
-
- 23
|
40 |
-
- 55
|
41 |
-
- 66
|
42 |
-
"4": 3.12.3
|
43 |
-
"5": 0.19.6
|
44 |
-
"6": 4.49.0.dev0
|
45 |
-
"8":
|
46 |
-
- 5
|
47 |
-
"9":
|
48 |
-
"1": transformers_trainer
|
49 |
-
"12": 0.19.6
|
50 |
-
"13": linux-x86_64
|
51 |
-
accelerator_config:
|
52 |
-
value:
|
53 |
-
dispatch_batches: null
|
54 |
-
even_batches: true
|
55 |
-
gradient_accumulation_kwargs: null
|
56 |
-
non_blocking: false
|
57 |
-
split_batches: false
|
58 |
-
use_seedable_sampler: true
|
59 |
-
activation_dropout:
|
60 |
-
value: 0
|
61 |
-
activation_function:
|
62 |
-
value: gelu
|
63 |
-
adafactor:
|
64 |
-
value: false
|
65 |
-
adam_beta1:
|
66 |
-
value: 0.9
|
67 |
-
adam_beta2:
|
68 |
-
value: 0.999
|
69 |
-
adam_epsilon:
|
70 |
-
value: 1e-08
|
71 |
-
add_cross_attention:
|
72 |
-
value: false
|
73 |
-
apply_spec_augment:
|
74 |
-
value: false
|
75 |
-
architectures:
|
76 |
-
value:
|
77 |
-
- WhisperForConditionalGeneration
|
78 |
-
attention_dropout:
|
79 |
-
value: 0
|
80 |
-
auto_find_batch_size:
|
81 |
-
value: false
|
82 |
-
average_tokens_across_devices:
|
83 |
-
value: false
|
84 |
-
bad_words_ids:
|
85 |
-
value: null
|
86 |
-
batch_eval_metrics:
|
87 |
-
value: false
|
88 |
-
begin_suppress_tokens:
|
89 |
-
value:
|
90 |
-
- 220
|
91 |
-
- 50257
|
92 |
-
bf16:
|
93 |
-
value: false
|
94 |
-
bf16_full_eval:
|
95 |
-
value: false
|
96 |
-
bos_token_id:
|
97 |
-
value: 50257
|
98 |
-
chunk_size_feed_forward:
|
99 |
-
value: 0
|
100 |
-
classifier_proj_size:
|
101 |
-
value: 256
|
102 |
-
cross_attention_hidden_size:
|
103 |
-
value: null
|
104 |
-
d_model:
|
105 |
-
value: 768
|
106 |
-
data_seed:
|
107 |
-
value: null
|
108 |
-
dataloader_drop_last:
|
109 |
-
value: false
|
110 |
-
dataloader_num_workers:
|
111 |
-
value: 0
|
112 |
-
dataloader_persistent_workers:
|
113 |
-
value: false
|
114 |
-
dataloader_pin_memory:
|
115 |
-
value: true
|
116 |
-
dataloader_prefetch_factor:
|
117 |
-
value: null
|
118 |
-
ddp_backend:
|
119 |
-
value: null
|
120 |
-
ddp_broadcast_buffers:
|
121 |
-
value: null
|
122 |
-
ddp_bucket_cap_mb:
|
123 |
-
value: null
|
124 |
-
ddp_find_unused_parameters:
|
125 |
-
value: null
|
126 |
-
ddp_timeout:
|
127 |
-
value: 1800
|
128 |
-
debug:
|
129 |
-
value: []
|
130 |
-
decoder_attention_heads:
|
131 |
-
value: 12
|
132 |
-
decoder_ffn_dim:
|
133 |
-
value: 3072
|
134 |
-
decoder_layerdrop:
|
135 |
-
value: 0
|
136 |
-
decoder_layers:
|
137 |
-
value: 12
|
138 |
-
decoder_start_token_id:
|
139 |
-
value: 50258
|
140 |
-
deepspeed:
|
141 |
-
value: null
|
142 |
-
disable_tqdm:
|
143 |
-
value: false
|
144 |
-
dispatch_batches:
|
145 |
-
value: null
|
146 |
-
diversity_penalty:
|
147 |
-
value: 0
|
148 |
-
do_eval:
|
149 |
-
value: true
|
150 |
-
do_predict:
|
151 |
-
value: false
|
152 |
-
do_sample:
|
153 |
-
value: false
|
154 |
-
do_train:
|
155 |
-
value: true
|
156 |
-
dropout:
|
157 |
-
value: 0
|
158 |
-
early_stopping:
|
159 |
-
value: false
|
160 |
-
encoder_attention_heads:
|
161 |
-
value: 12
|
162 |
-
encoder_ffn_dim:
|
163 |
-
value: 3072
|
164 |
-
encoder_layerdrop:
|
165 |
-
value: 0
|
166 |
-
encoder_layers:
|
167 |
-
value: 12
|
168 |
-
encoder_no_repeat_ngram_size:
|
169 |
-
value: 0
|
170 |
-
eos_token_id:
|
171 |
-
value: 50257
|
172 |
-
eval_accumulation_steps:
|
173 |
-
value: null
|
174 |
-
eval_delay:
|
175 |
-
value: 0
|
176 |
-
eval_do_concat_batches:
|
177 |
-
value: true
|
178 |
-
eval_on_start:
|
179 |
-
value: false
|
180 |
-
eval_steps:
|
181 |
-
value: 1000
|
182 |
-
eval_strategy:
|
183 |
-
value: steps
|
184 |
-
eval_use_gather_object:
|
185 |
-
value: false
|
186 |
-
evaluation_strategy:
|
187 |
-
value: steps
|
188 |
-
exponential_decay_length_penalty:
|
189 |
-
value: null
|
190 |
-
finetuning_task:
|
191 |
-
value: null
|
192 |
-
forced_bos_token_id:
|
193 |
-
value: null
|
194 |
-
forced_decoder_ids:
|
195 |
-
value: null
|
196 |
-
forced_eos_token_id:
|
197 |
-
value: null
|
198 |
-
fp16:
|
199 |
-
value: true
|
200 |
-
fp16_backend:
|
201 |
-
value: auto
|
202 |
-
fp16_full_eval:
|
203 |
-
value: false
|
204 |
-
fp16_opt_level:
|
205 |
-
value: O1
|
206 |
-
fsdp:
|
207 |
-
value: []
|
208 |
-
fsdp_config:
|
209 |
-
value:
|
210 |
-
min_num_params: 0
|
211 |
-
xla: false
|
212 |
-
xla_fsdp_grad_ckpt: false
|
213 |
-
xla_fsdp_v2: false
|
214 |
-
fsdp_min_num_params:
|
215 |
-
value: 0
|
216 |
-
fsdp_transformer_layer_cls_to_wrap:
|
217 |
-
value: null
|
218 |
-
full_determinism:
|
219 |
-
value: false
|
220 |
-
generation_config:
|
221 |
-
value: null
|
222 |
-
generation_max_length:
|
223 |
-
value: 225
|
224 |
-
generation_num_beams:
|
225 |
-
value: null
|
226 |
-
gradient_accumulation_steps:
|
227 |
-
value: 1
|
228 |
-
gradient_checkpointing:
|
229 |
-
value: true
|
230 |
-
gradient_checkpointing_kwargs:
|
231 |
-
value: null
|
232 |
-
greater_is_better:
|
233 |
-
value: false
|
234 |
-
group_by_length:
|
235 |
-
value: false
|
236 |
-
half_precision_backend:
|
237 |
-
value: auto
|
238 |
-
hub_always_push:
|
239 |
-
value: false
|
240 |
-
hub_model_id:
|
241 |
-
value: null
|
242 |
-
hub_private_repo:
|
243 |
-
value: null
|
244 |
-
hub_strategy:
|
245 |
-
value: every_save
|
246 |
-
hub_token:
|
247 |
-
value: <HUB_TOKEN>
|
248 |
-
id2label:
|
249 |
-
value:
|
250 |
-
"0": LABEL_0
|
251 |
-
"1": LABEL_1
|
252 |
-
ignore_data_skip:
|
253 |
-
value: false
|
254 |
-
include_for_metrics:
|
255 |
-
value: []
|
256 |
-
include_inputs_for_metrics:
|
257 |
-
value: false
|
258 |
-
include_num_input_tokens_seen:
|
259 |
-
value: false
|
260 |
-
include_tokens_per_second:
|
261 |
-
value: false
|
262 |
-
init_std:
|
263 |
-
value: 0.02
|
264 |
-
is_decoder:
|
265 |
-
value: false
|
266 |
-
is_encoder_decoder:
|
267 |
-
value: true
|
268 |
-
jit_mode_eval:
|
269 |
-
value: false
|
270 |
-
label_names:
|
271 |
-
value: null
|
272 |
-
label_smoothing_factor:
|
273 |
-
value: 0
|
274 |
-
label2id:
|
275 |
-
value:
|
276 |
-
LABEL_0: 0
|
277 |
-
LABEL_1: 1
|
278 |
-
learning_rate:
|
279 |
-
value: 1e-05
|
280 |
-
length_column_name:
|
281 |
-
value: input_length
|
282 |
-
length_penalty:
|
283 |
-
value: 1
|
284 |
-
load_best_model_at_end:
|
285 |
-
value: true
|
286 |
-
local_rank:
|
287 |
-
value: 0
|
288 |
-
log_level:
|
289 |
-
value: passive
|
290 |
-
log_level_replica:
|
291 |
-
value: warning
|
292 |
-
log_on_each_node:
|
293 |
-
value: true
|
294 |
-
logging_dir:
|
295 |
-
value: ./runs/Feb12_13-05-10_tknika
|
296 |
-
logging_first_step:
|
297 |
-
value: false
|
298 |
-
logging_nan_inf_filter:
|
299 |
-
value: true
|
300 |
-
logging_steps:
|
301 |
-
value: 25
|
302 |
-
logging_strategy:
|
303 |
-
value: steps
|
304 |
-
lr_scheduler_type:
|
305 |
-
value: linear
|
306 |
-
mask_feature_length:
|
307 |
-
value: 10
|
308 |
-
mask_feature_min_masks:
|
309 |
-
value: 0
|
310 |
-
mask_feature_prob:
|
311 |
-
value: 0
|
312 |
-
mask_time_length:
|
313 |
-
value: 10
|
314 |
-
mask_time_min_masks:
|
315 |
-
value: 2
|
316 |
-
mask_time_prob:
|
317 |
-
value: 0.05
|
318 |
-
max_grad_norm:
|
319 |
-
value: 1
|
320 |
-
max_length:
|
321 |
-
value: 448
|
322 |
-
max_source_positions:
|
323 |
-
value: 1500
|
324 |
-
max_steps:
|
325 |
-
value: 8000
|
326 |
-
max_target_positions:
|
327 |
-
value: 448
|
328 |
-
median_filter_width:
|
329 |
-
value: 7
|
330 |
-
metric_for_best_model:
|
331 |
-
value: wer
|
332 |
-
min_length:
|
333 |
-
value: 0
|
334 |
-
model/num_parameters:
|
335 |
-
value: 241734912
|
336 |
-
model_type:
|
337 |
-
value: whisper
|
338 |
-
mp_parameters:
|
339 |
-
value: ""
|
340 |
-
neftune_noise_alpha:
|
341 |
-
value: null
|
342 |
-
no_cuda:
|
343 |
-
value: false
|
344 |
-
no_repeat_ngram_size:
|
345 |
-
value: 0
|
346 |
-
num_beam_groups:
|
347 |
-
value: 1
|
348 |
-
num_beams:
|
349 |
-
value: 1
|
350 |
-
num_hidden_layers:
|
351 |
-
value: 12
|
352 |
-
num_mel_bins:
|
353 |
-
value: 80
|
354 |
-
num_return_sequences:
|
355 |
-
value: 1
|
356 |
-
num_train_epochs:
|
357 |
-
value: 3
|
358 |
-
optim:
|
359 |
-
value: adamw_torch
|
360 |
-
optim_args:
|
361 |
-
value: null
|
362 |
-
optim_target_modules:
|
363 |
-
value: null
|
364 |
-
output_attentions:
|
365 |
-
value: false
|
366 |
-
output_dir:
|
367 |
-
value: ./
|
368 |
-
output_hidden_states:
|
369 |
-
value: false
|
370 |
-
output_scores:
|
371 |
-
value: false
|
372 |
-
overwrite_output_dir:
|
373 |
-
value: true
|
374 |
-
pad_token_id:
|
375 |
-
value: 50257
|
376 |
-
past_index:
|
377 |
-
value: -1
|
378 |
-
per_device_eval_batch_size:
|
379 |
-
value: 16
|
380 |
-
per_device_train_batch_size:
|
381 |
-
value: 32
|
382 |
-
per_gpu_eval_batch_size:
|
383 |
-
value: null
|
384 |
-
per_gpu_train_batch_size:
|
385 |
-
value: null
|
386 |
-
predict_with_generate:
|
387 |
-
value: true
|
388 |
-
prediction_loss_only:
|
389 |
-
value: false
|
390 |
-
prefix:
|
391 |
-
value: null
|
392 |
-
problem_type:
|
393 |
-
value: null
|
394 |
-
push_to_hub:
|
395 |
-
value: true
|
396 |
-
push_to_hub_model_id:
|
397 |
-
value: null
|
398 |
-
push_to_hub_organization:
|
399 |
-
value: null
|
400 |
-
push_to_hub_token:
|
401 |
-
value: <PUSH_TO_HUB_TOKEN>
|
402 |
-
ray_scope:
|
403 |
-
value: last
|
404 |
-
remove_invalid_values:
|
405 |
-
value: false
|
406 |
-
remove_unused_columns:
|
407 |
-
value: true
|
408 |
-
repetition_penalty:
|
409 |
-
value: 1
|
410 |
-
report_to:
|
411 |
-
value:
|
412 |
-
- wandb
|
413 |
-
restore_callback_states_from_checkpoint:
|
414 |
-
value: false
|
415 |
-
resume_from_checkpoint:
|
416 |
-
value: null
|
417 |
-
return_dict:
|
418 |
-
value: true
|
419 |
-
return_dict_in_generate:
|
420 |
-
value: false
|
421 |
-
run_name:
|
422 |
-
value: whisper-small-eu
|
423 |
-
save_on_each_node:
|
424 |
-
value: false
|
425 |
-
save_only_model:
|
426 |
-
value: false
|
427 |
-
save_safetensors:
|
428 |
-
value: true
|
429 |
-
save_steps:
|
430 |
-
value: 1000
|
431 |
-
save_strategy:
|
432 |
-
value: steps
|
433 |
-
save_total_limit:
|
434 |
-
value: null
|
435 |
-
scale_embedding:
|
436 |
-
value: false
|
437 |
-
seed:
|
438 |
-
value: 42
|
439 |
-
sep_token_id:
|
440 |
-
value: null
|
441 |
-
skip_memory_metrics:
|
442 |
-
value: true
|
443 |
-
sortish_sampler:
|
444 |
-
value: false
|
445 |
-
split_batches:
|
446 |
-
value: null
|
447 |
-
suppress_tokens:
|
448 |
-
value: null
|
449 |
-
task_specific_params:
|
450 |
-
value: null
|
451 |
-
temperature:
|
452 |
-
value: 1
|
453 |
-
tf_legacy_loss:
|
454 |
-
value: false
|
455 |
-
tf32:
|
456 |
-
value: null
|
457 |
-
tie_encoder_decoder:
|
458 |
-
value: false
|
459 |
-
tie_word_embeddings:
|
460 |
-
value: true
|
461 |
-
tokenizer_class:
|
462 |
-
value: null
|
463 |
-
top_k:
|
464 |
-
value: 50
|
465 |
-
top_p:
|
466 |
-
value: 1
|
467 |
-
torch_compile:
|
468 |
-
value: false
|
469 |
-
torch_compile_backend:
|
470 |
-
value: null
|
471 |
-
torch_compile_mode:
|
472 |
-
value: null
|
473 |
-
torch_dtype:
|
474 |
-
value: float32
|
475 |
-
torch_empty_cache_steps:
|
476 |
-
value: null
|
477 |
-
torchdynamo:
|
478 |
-
value: null
|
479 |
-
torchscript:
|
480 |
-
value: false
|
481 |
-
tpu_metrics_debug:
|
482 |
-
value: false
|
483 |
-
tpu_num_cores:
|
484 |
-
value: null
|
485 |
-
transformers_version:
|
486 |
-
value: 4.49.0.dev0
|
487 |
-
typical_p:
|
488 |
-
value: 1
|
489 |
-
use_bfloat16:
|
490 |
-
value: false
|
491 |
-
use_cache:
|
492 |
-
value: false
|
493 |
-
use_cpu:
|
494 |
-
value: false
|
495 |
-
use_ipex:
|
496 |
-
value: false
|
497 |
-
use_legacy_prediction_loop:
|
498 |
-
value: false
|
499 |
-
use_liger_kernel:
|
500 |
-
value: false
|
501 |
-
use_mps_device:
|
502 |
-
value: false
|
503 |
-
use_weighted_layer_sum:
|
504 |
-
value: false
|
505 |
-
vocab_size:
|
506 |
-
value: 51865
|
507 |
-
warmup_ratio:
|
508 |
-
value: 0
|
509 |
-
warmup_steps:
|
510 |
-
value: 500
|
511 |
-
weight_decay:
|
512 |
-
value: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wandb/run-20250212_130533-zeu6vay4/files/output.log
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
2 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 633, in <module>
|
3 |
-
main()
|
4 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 582, in main
|
5 |
-
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
6 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
7 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
8 |
-
return inner_training_loop(
|
9 |
-
^^^^^^^^^^^^^^^^^^^^
|
10 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
11 |
-
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
12 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
13 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
14 |
-
return self.call_event("on_epoch_begin", args, state, control)
|
15 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
16 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
17 |
-
result = getattr(callback, event)(
|
18 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
19 |
-
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
|
20 |
-
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
21 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
22 |
-
AttributeError: 'NoneType' object has no attribute 'dataset'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|