diff --git a/.gitignore b/.gitignore
index 0d763d77e0bd715a70781df9cbd287e6819c63ed..98c8233d456d2b5b8468791fb8ac8e89204c9c49 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 checkpoint-*/
 nohup.out
 .venv/
+wandb/
diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
deleted file mode 100644
index 3e76e3e81a1059aef65345804a4f269bcd9183ad..0000000000000000000000000000000000000000
--- a/wandb/debug-internal.log
+++ /dev/null
@@ -1,7 +0,0 @@
-{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
-{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
diff --git a/wandb/debug.log b/wandb/debug.log
deleted file mode 100644
index d6ccd57adcf2bbe370a51abbc03e1e4a2718eb85..0000000000000000000000000000000000000000
--- a/wandb/debug.log
+++ /dev/null
@@ -1,25 +0,0 @@
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():756] calling init triggers
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():789] starting backend
-2025-02-12 15:27:10,107 INFO    MainThread:243546 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 15:27:10,112 INFO    MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 15:27:10,113 INFO    MainThread:243546 [wandb_init.py:init():808] backend started and connected
-2025-02-12 15:27:10,115 INFO    MainThread:243546 [wandb_init.py:init():901] updated telemetry
-2025-02-12 15:27:10,122 INFO    MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 15:27:10,584 INFO    MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 15:27:10,691 INFO    MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 15:27:10,694 INFO    MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 15:27:10,698 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
-2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/config.yaml b/wandb/run-20250212_121751-d4i88lzt/files/config.yaml
deleted file mode 100644
index 043453cafc3d9969981ef61b7a8be2e92734597b..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_12-17-27_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/output.log b/wandb/run-20250212_121751-d4i88lzt/files/output.log
deleted file mode 100644
index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt b/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json b/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json
deleted file mode 100644
index 1c4f87be68cf8cc40c16f58b87fc77156484b279..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T12:17:51.527114Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "297346564096"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json b/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log
deleted file mode 100644
index d6af67f7efa449508164027a6273196ce78339b0..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392}
-{"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}}
-{"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"}
-{"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
deleted file mode 100644
index 26894375e49df56758efcfe21e6d3c1198d1f1c3..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"}
-{"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"}
-{"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"}
diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
deleted file mode 100644
index 9d9b0f45e81af07737809add0aa564e5a82d4e9b..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
-2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_init.py:init():756] calling init triggers
-2025-02-12 12:17:51,312 INFO    MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 12:17:51,312 INFO    MainThread:223392 [wandb_init.py:init():789] starting backend
-2025-02-12 12:17:51,521 INFO    MainThread:223392 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 12:17:51,526 INFO    MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 12:17:51,526 INFO    MainThread:223392 [wandb_init.py:init():808] backend started and connected
-2025-02-12 12:17:51,528 INFO    MainThread:223392 [wandb_init.py:init():901] updated telemetry
-2025-02-12 12:17:51,535 INFO    MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 12:17:51,944 INFO    MainThread:223392 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 12:17:52,051 INFO    MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 12:17:52,052 INFO    MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 12:17:52,054 INFO    MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75ef87e92c00>>
-2025-02-12 12:17:52,055 INFO    MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb b/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb
deleted file mode 100644
index 22a915c2f93dba32f6fb4cb19cdad8d41103ad8f..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb and /dev/null differ
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml b/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml
deleted file mode 100644
index 0190eccc1f197e04acf36d5f4461aa7e993e6582..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_12-26-11_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/output.log b/wandb/run-20250212_122637-v3d3ouvn/files/output.log
deleted file mode 100644
index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt b/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json
deleted file mode 100644
index 09e834199f3ff4987252a9c26cf0f4e0a17ac89c..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T12:26:37.277902Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "297346666496"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log
deleted file mode 100644
index a2c451b0fdc549443d1d6b56f3138b2f0778d2da..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110}
-{"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}}
-{"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"}
-{"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
deleted file mode 100644
index 836534c70d6b5a016b47367347a114c8262f9db4..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"}
-{"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"}
-{"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"}
diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
deleted file mode 100644
index 651a7c89fd0b1c415f299466295634e0fb5f97ae..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:init():756] calling init triggers
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:init():789] starting backend
-2025-02-12 12:26:37,272 INFO    MainThread:224110 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 12:26:37,277 INFO    MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 12:26:37,277 INFO    MainThread:224110 [wandb_init.py:init():808] backend started and connected
-2025-02-12 12:26:37,279 INFO    MainThread:224110 [wandb_init.py:init():901] updated telemetry
-2025-02-12 12:26:37,285 INFO    MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 12:26:37,653 INFO    MainThread:224110 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 12:26:37,764 INFO    MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 12:26:37,765 INFO    MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 12:26:37,765 INFO    MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 12:26:37,765 INFO    MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 12:26:37,766 INFO    MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 12:26:37,767 INFO    MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 12:26:37,770 INFO    MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7a5cbc15a330>>
-2025-02-12 12:26:37,770 INFO    MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb b/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb
deleted file mode 100644
index ec09d574bec753b9c13409f7b715d91402a72733..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb and /dev/null differ
diff --git a/wandb/run-20250212_122854-4m048f5s/files/config.yaml b/wandb/run-20250212_122854-4m048f5s/files/config.yaml
deleted file mode 100644
index cd2427648bdf8f30b7a5b1a74e995772423700bd..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_12-28-29_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_122854-4m048f5s/files/output.log b/wandb/run-20250212_122854-4m048f5s/files/output.log
deleted file mode 100644
index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_122854-4m048f5s/files/requirements.txt b/wandb/run-20250212_122854-4m048f5s/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json b/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json
deleted file mode 100644
index f63869dc081394dcbd07bd84335642df549da86e..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T12:28:54.528397Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "297346756608"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json b/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log b/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log
deleted file mode 100644
index 0a340626ec035668304ac5b99a523d0e9b994b99..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528}
-{"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}}
-{"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"}
-{"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log b/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
deleted file mode 100644
index 12bd0e04a87c9e69aaa4351910eaad3205df4abf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"}
-{"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"}
-{"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"}
-{"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"}
-{"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"}
-{"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"}
-{"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"}
-{"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"}
-{"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"}
-{"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"}
-{"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"}
diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug.log b/wandb/run-20250212_122854-4m048f5s/logs/debug.log
deleted file mode 100644
index 54eaf255ca03d83dfe297f954d06f7be79378056..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_122854-4m048f5s/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:init():756] calling init triggers
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:init():789] starting backend
-2025-02-12 12:28:54,521 INFO    MainThread:224528 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 12:28:54,527 INFO    MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 12:28:54,528 INFO    MainThread:224528 [wandb_init.py:init():808] backend started and connected
-2025-02-12 12:28:54,530 INFO    MainThread:224528 [wandb_init.py:init():901] updated telemetry
-2025-02-12 12:28:54,537 INFO    MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 12:28:54,883 INFO    MainThread:224528 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 12:28:54,988 INFO    MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 12:28:54,989 INFO    MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 12:28:54,989 INFO    MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 12:28:54,989 INFO    MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 12:28:54,990 INFO    MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 12:28:54,991 INFO    MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 12:28:54,995 INFO    MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x71c5f6c57cb0>>
-2025-02-12 12:28:54,995 INFO    MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb b/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb
deleted file mode 100644
index d6e2b9a990f433fbef578c9ebdfb5b9a71ba6c24..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb and /dev/null differ
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml b/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml
deleted file mode 100644
index fda1ff6a94e5d574407cad68313478699d3b809c..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_12-51-48_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/output.log b/wandb/run-20250212_125202-c6xjc1gs/files/output.log
deleted file mode 100644
index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt b/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json
deleted file mode 100644
index de67d54e80183d40bb763b73a71aa662e9c26e2d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T12:52:03.105234Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=test_parl",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777016832"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log
deleted file mode 100644
index b2a91d838478809786e569e4b108333296f1c838..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112}
-{"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}}
-{"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"}
-{"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
deleted file mode 100644
index e92e3fbe32b1a1557ac2822b511c40cd3bec6edc..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"}
-{"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"}
-{"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"}
diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
deleted file mode 100644
index f7f6579dfe6ece92d42e8341a45478b9a1a5d112..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 12:52:02,886 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:init():756] calling init triggers
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:init():789] starting backend
-2025-02-12 12:52:03,097 INFO    MainThread:226112 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 12:52:03,104 INFO    MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 12:52:03,104 INFO    MainThread:226112 [wandb_init.py:init():808] backend started and connected
-2025-02-12 12:52:03,107 INFO    MainThread:226112 [wandb_init.py:init():901] updated telemetry
-2025-02-12 12:52:03,114 INFO    MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 12:52:03,483 INFO    MainThread:226112 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 12:52:03,568 INFO    MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 12:52:03,569 INFO    MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 12:52:03,571 INFO    MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7d4830f2ddf0>>
-2025-02-12 12:52:03,571 INFO    MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb b/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb
deleted file mode 100644
index 2f75b3e73a217b18b87ae8966776fa45be09c53d..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb and /dev/null differ
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml b/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml
deleted file mode 100644
index 07f41facb74690088d1d0b07a0c5bd2e9cb291f4..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_12-58-59_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/output.log b/wandb/run-20250212_125924-xhsgsxqq/files/output.log
deleted file mode 100644
index 0e757b553e4758f4d7cd9dbeb4450a0b166b1880..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 632, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt b/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json
deleted file mode 100644
index 6df8aff999de9e30b89c5c5cc1a7450cf021e67a..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T12:59:24.816046Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777115136"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log
deleted file mode 100644
index b7598f14ab3c8cbb4c8d36057ac70a206f4ddfed..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T12:59:24.63359638Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa90v2n0h/port-226591.txt","pid":226591,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T12:59:24.673351851Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226591}
-{"time":"2025-02-12T12:59:24.673324591Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41203,"Zone":""}}
-{"time":"2025-02-12T12:59:24.809390061Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:24.819517706Z","level":"INFO","msg":"handleInformInit: received","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:24.923364896Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:25.341856618Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:25.341962867Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T12:59:25.341964847Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:25.342139496Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:25.569637185Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41203->127.0.0.1:53388: use of closed network connection","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:26.643739482Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:26.643783881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:53388"}
-{"time":"2025-02-12T12:59:26.643808411Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
deleted file mode 100644
index 26b5602b5006fa339037330ef9890aa5ee369829..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T12:59:24.819689255Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log"}
-{"time":"2025-02-12T12:59:24.923303396Z","level":"INFO","msg":"created new stream","id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:24.923354596Z","level":"INFO","msg":"stream: started","id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:24.923472545Z","level":"INFO","msg":"writer: Do: started","stream_id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:24.923494475Z","level":"INFO","msg":"handler: started","stream_id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:24.923560215Z","level":"INFO","msg":"sender: started","stream_id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:25.192419842Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T12:59:25.341944447Z","level":"INFO","msg":"stream: closing","id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:25.341971537Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T12:59:25.342739283Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T12:59:26.408412135Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T12:59:26.643441283Z","level":"INFO","msg":"handler: closed","stream_id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:26.643483513Z","level":"INFO","msg":"writer: Close: closed","stream_id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:26.643525403Z","level":"INFO","msg":"sender: closed","stream_id":"xhsgsxqq"}
-{"time":"2025-02-12T12:59:26.643566923Z","level":"INFO","msg":"stream: closed","id":"xhsgsxqq"}
diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
deleted file mode 100644
index bc26d42fc8a709f9cfcfa6a7dbdbbc82be8f50e9..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Configure stats pid to 226591
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:init():756] calling init triggers
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:init():789] starting backend
-2025-02-12 12:59:24,809 INFO    MainThread:226591 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 12:59:24,815 INFO    MainThread:226591 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 12:59:24,815 INFO    MainThread:226591 [wandb_init.py:init():808] backend started and connected
-2025-02-12 12:59:24,818 INFO    MainThread:226591 [wandb_init.py:init():901] updated telemetry
-2025-02-12 12:59:24,823 INFO    MainThread:226591 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 12:59:25,189 INFO    MainThread:226591 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 12:59:25,303 INFO    MainThread:226591 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 12:59:25,304 INFO    MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-58-59_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 12:59:25,306 INFO    MainThread:226591 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x76451d282f30>>
-2025-02-12 12:59:25,306 INFO    MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 12:59:25,342 WARNING MsgRouterThr:226591 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb b/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb
deleted file mode 100644
index 1b53de295e68266af60d83bca74b9be0894b34e8..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb and /dev/null differ
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/config.yaml b/wandb/run-20250212_130533-zeu6vay4/files/config.yaml
deleted file mode 100644
index 420595717bd915426e2e2cab93fdb5e08bd589cb..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_13-05-10_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/output.log b/wandb/run-20250212_130533-zeu6vay4/files/output.log
deleted file mode 100644
index 769cfff2bf9a545350da2d3c1f0974308181aefb..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 633, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 582, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt b/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json b/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json
deleted file mode 100644
index aebfc577d2e88050d8085f734bc8f66f785f3040..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T13:05:34.019960Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777221632"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json b/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log
deleted file mode 100644
index 87fbf64e8666318f6e5b43dae2d01a140fc24ead..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T13:05:33.837553705Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvxrga7tq/port-227738.txt","pid":227738,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T13:05:33.841824082Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":227738}
-{"time":"2025-02-12T13:05:33.841805772Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41271,"Zone":""}}
-{"time":"2025-02-12T13:05:34.013327792Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:34.022772969Z","level":"INFO","msg":"handleInformInit: received","streamId":"zeu6vay4","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:34.128884037Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"zeu6vay4","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:34.567948471Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:34.568036341Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:34.568049191Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T13:05:34.568283769Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:34.807984453Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41271->127.0.0.1:36972: use of closed network connection","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:35.861897508Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:35.861922138Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:36972"}
-{"time":"2025-02-12T13:05:35.861946917Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log
deleted file mode 100644
index d3fe88aa2e519427abbec9fea34b1053792e9916..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T13:05:34.023147337Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log"}
-{"time":"2025-02-12T13:05:34.128801887Z","level":"INFO","msg":"created new stream","id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:34.128873947Z","level":"INFO","msg":"stream: started","id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:34.128926027Z","level":"INFO","msg":"writer: Do: started","stream_id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:34.128988376Z","level":"INFO","msg":"sender: started","stream_id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:34.129040036Z","level":"INFO","msg":"handler: started","stream_id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:34.419229803Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T13:05:34.568056211Z","level":"INFO","msg":"stream: closing","id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:34.56812972Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T13:05:34.568907056Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T13:05:35.64406902Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T13:05:35.861616759Z","level":"INFO","msg":"handler: closed","stream_id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:35.861676279Z","level":"INFO","msg":"writer: Close: closed","stream_id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:35.861688929Z","level":"INFO","msg":"sender: closed","stream_id":"zeu6vay4"}
-{"time":"2025-02-12T13:05:35.861776488Z","level":"INFO","msg":"stream: closed","id":"zeu6vay4"}
diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug.log
deleted file mode 100644
index 9b2ee9877baf73f9ef90a46ae8c6af7f3f198927..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_130533-zeu6vay4/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Configure stats pid to 227738
-2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug.log
-2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log
-2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:init():756] calling init triggers
-2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:init():789] starting backend
-2025-02-12 13:05:34,013 INFO    MainThread:227738 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 13:05:34,019 INFO    MainThread:227738 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 13:05:34,019 INFO    MainThread:227738 [wandb_init.py:init():808] backend started and connected
-2025-02-12 13:05:34,021 INFO    MainThread:227738 [wandb_init.py:init():901] updated telemetry
-2025-02-12 13:05:34,029 INFO    MainThread:227738 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 13:05:34,416 INFO    MainThread:227738 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 13:05:34,529 INFO    MainThread:227738 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 13:05:34,530 INFO    MainThread:227738 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-05-10_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 13:05:34,533 INFO    MainThread:227738 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x79ded964ef90>>
-2025-02-12 13:05:34,533 INFO    MainThread:227738 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 13:05:34,568 WARNING MsgRouterThr:227738 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb b/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb
deleted file mode 100644
index 7901333681792ad6d6f54878666ba62ed5e18337..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb and /dev/null differ
diff --git a/wandb/run-20250212_131820-cnos968u/files/config.yaml b/wandb/run-20250212_131820-cnos968u/files/config.yaml
deleted file mode 100644
index 95906302a5990edf8b7625c376bbe1acd0b580a0..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_13-17-51_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_131820-cnos968u/files/output.log b/wandb/run-20250212_131820-cnos968u/files/output.log
deleted file mode 100644
index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_131820-cnos968u/files/requirements.txt b/wandb/run-20250212_131820-cnos968u/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json b/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json
deleted file mode 100644
index 90498074d4382bf401e4eae2589b05d71bb571fe..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T13:18:20.315500Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777364992"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json b/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug-core.log b/wandb/run-20250212_131820-cnos968u/logs/debug-core.log
deleted file mode 100644
index c6146a7673a983023bddad8f6b42c8bee05d89af..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T13:18:20.13270486Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoihqtf7_/port-228562.txt","pid":228562,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T13:18:20.138326066Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":228562}
-{"time":"2025-02-12T13:18:20.138290606Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43223,"Zone":""}}
-{"time":"2025-02-12T13:18:20.311508128Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:20.318120198Z","level":"INFO","msg":"handleInformInit: received","streamId":"cnos968u","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:20.423169259Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"cnos968u","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:20.829667746Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:20.829731656Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:20.829793855Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T13:18:20.829860655Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:20.996850774Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:43223->127.0.0.1:59824: use of closed network connection","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:22.315588426Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:22.315614045Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:59824"}
-{"time":"2025-02-12T13:18:22.315638895Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log b/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log
deleted file mode 100644
index 8906a5937e9d4dc75937d8ef3efaf8e6c19bc30f..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T13:18:20.318385167Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug-core.log"}
-{"time":"2025-02-12T13:18:20.423108789Z","level":"INFO","msg":"created new stream","id":"cnos968u"}
-{"time":"2025-02-12T13:18:20.423149939Z","level":"INFO","msg":"stream: started","id":"cnos968u"}
-{"time":"2025-02-12T13:18:20.423187669Z","level":"INFO","msg":"writer: Do: started","stream_id":"cnos968u"}
-{"time":"2025-02-12T13:18:20.423274918Z","level":"INFO","msg":"sender: started","stream_id":"cnos968u"}
-{"time":"2025-02-12T13:18:20.423278528Z","level":"INFO","msg":"handler: started","stream_id":"cnos968u"}
-{"time":"2025-02-12T13:18:20.691599029Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T13:18:20.829801165Z","level":"INFO","msg":"stream: closing","id":"cnos968u"}
-{"time":"2025-02-12T13:18:20.829838505Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T13:18:20.830448261Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T13:18:22.102558325Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T13:18:22.315316527Z","level":"INFO","msg":"handler: closed","stream_id":"cnos968u"}
-{"time":"2025-02-12T13:18:22.315380557Z","level":"INFO","msg":"writer: Close: closed","stream_id":"cnos968u"}
-{"time":"2025-02-12T13:18:22.315415207Z","level":"INFO","msg":"sender: closed","stream_id":"cnos968u"}
-{"time":"2025-02-12T13:18:22.315473316Z","level":"INFO","msg":"stream: closed","id":"cnos968u"}
diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug.log b/wandb/run-20250212_131820-cnos968u/logs/debug.log
deleted file mode 100644
index 39be2fccb1e90ae66cd4e4477210e91974f70b36..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_131820-cnos968u/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Configure stats pid to 228562
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug.log
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:init():756] calling init triggers
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:init():789] starting backend
-2025-02-12 13:18:20,311 INFO    MainThread:228562 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 13:18:20,315 INFO    MainThread:228562 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 13:18:20,315 INFO    MainThread:228562 [wandb_init.py:init():808] backend started and connected
-2025-02-12 13:18:20,316 INFO    MainThread:228562 [wandb_init.py:init():901] updated telemetry
-2025-02-12 13:18:20,320 INFO    MainThread:228562 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 13:18:20,688 INFO    MainThread:228562 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 13:18:20,791 INFO    MainThread:228562 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 13:18:20,792 INFO    MainThread:228562 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-17-51_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 13:18:20,794 INFO    MainThread:228562 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7c8ab3472b40>>
-2025-02-12 13:18:20,794 INFO    MainThread:228562 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 13:18:20,830 WARNING MsgRouterThr:228562 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb b/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb
deleted file mode 100644
index 8fd59480179eef903cc9efb265ca12fb88f77992..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb and /dev/null differ
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml b/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml
deleted file mode 100644
index cf75123251a142c3bb6a48006c0bdfb4679249b3..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_13-49-16_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/output.log b/wandb/run-20250212_134942-5ywh9vkd/files/output.log
deleted file mode 100644
index b9e4b6c313316dc48b344f88dc5473c4de1f1088..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 631, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 580, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt b/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json
deleted file mode 100644
index 3da9f22f6e027ebd1af1c6145cfa29e6643c9e3d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T13:49:42.549340Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777541120"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log
deleted file mode 100644
index 14ea2e58769ad59628cf9dcf7f7d3c3cd69e16ea..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T13:49:42.368539349Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcs75h_7n/port-230104.txt","pid":230104,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T13:49:42.376031144Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230104}
-{"time":"2025-02-12T13:49:42.375994744Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46317,"Zone":""}}
-{"time":"2025-02-12T13:49:42.545856407Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:42.550347793Z","level":"INFO","msg":"handleInformInit: received","streamId":"5ywh9vkd","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:42.653585761Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"5ywh9vkd","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:43.065873804Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:43.065944244Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:43.065999603Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T13:49:43.066118552Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:43.307941987Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:46317->127.0.0.1:58840: use of closed network connection","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:44.336718599Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:44.336762259Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:58840"}
-{"time":"2025-02-12T13:49:44.336780169Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log
deleted file mode 100644
index 958ba48035f615c72447b59df82dad6ed3e33a35..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T13:49:42.550471882Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log"}
-{"time":"2025-02-12T13:49:42.653534801Z","level":"INFO","msg":"created new stream","id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:42.653576741Z","level":"INFO","msg":"stream: started","id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:42.653711879Z","level":"INFO","msg":"handler: started","stream_id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:42.653689329Z","level":"INFO","msg":"writer: Do: started","stream_id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:42.653750879Z","level":"INFO","msg":"sender: started","stream_id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:42.915738751Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T13:49:43.065978643Z","level":"INFO","msg":"stream: closing","id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:43.066102462Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T13:49:43.066991893Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T13:49:44.121293278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T13:49:44.336369943Z","level":"INFO","msg":"handler: closed","stream_id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:44.336433982Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:44.336490092Z","level":"INFO","msg":"sender: closed","stream_id":"5ywh9vkd"}
-{"time":"2025-02-12T13:49:44.336537621Z","level":"INFO","msg":"stream: closed","id":"5ywh9vkd"}
diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log
deleted file mode 100644
index daf9c305c6a9112d08089604436fe99d07693e41..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Configure stats pid to 230104
-2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log
-2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log
-2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:init():756] calling init triggers
-2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:init():789] starting backend
-2025-02-12 13:49:42,545 INFO    MainThread:230104 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 13:49:42,548 INFO    MainThread:230104 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 13:49:42,549 INFO    MainThread:230104 [wandb_init.py:init():808] backend started and connected
-2025-02-12 13:49:42,550 INFO    MainThread:230104 [wandb_init.py:init():901] updated telemetry
-2025-02-12 13:49:42,553 INFO    MainThread:230104 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 13:49:42,912 INFO    MainThread:230104 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 13:49:43,027 INFO    MainThread:230104 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 13:49:43,028 INFO    MainThread:230104 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-49-16_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 13:49:43,031 INFO    MainThread:230104 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x701149d824e0>>
-2025-02-12 13:49:43,031 INFO    MainThread:230104 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 13:49:43,066 WARNING MsgRouterThr:230104 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb b/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb
deleted file mode 100644
index d95543eeebd7c7ded69dd770ef822456843f59fd..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb and /dev/null differ
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml b/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml
deleted file mode 100644
index 1f1ca62c76dba92301a01886cb09958ac5a1f9f8..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_13-51-27_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/output.log b/wandb/run-20250212_135151-5m7b3lhr/files/output.log
deleted file mode 100644
index da5340cfe9cc02a517c9de0cdb66c052b387beab..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/files/output.log
+++ /dev/null
@@ -1,23 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-Inside on_epoch_begin - train_dataloader: None
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 631, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 580, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 556, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt b/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json
deleted file mode 100644
index 3e914497e6936a06c61ef66438821f8fa5b99f36..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T13:51:51.496687Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777639424"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log
deleted file mode 100644
index 16c1fd0e3b169fe267030e00e4d3679cb9c42a52..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T13:51:51.3124141Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcyi8bfs3/port-230520.txt","pid":230520,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T13:51:51.317857259Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230520}
-{"time":"2025-02-12T13:51:51.317819419Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41833,"Zone":""}}
-{"time":"2025-02-12T13:51:51.490296524Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:51.49928487Z","level":"INFO","msg":"handleInformInit: received","streamId":"5m7b3lhr","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:51.60503634Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"5m7b3lhr","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:52.040899031Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:52.0409399Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:52.04094382Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T13:51:52.04102198Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:52.213258576Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41833->127.0.0.1:51804: use of closed network connection","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:53.364540267Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:53.364555197Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51804"}
-{"time":"2025-02-12T13:51:53.364566507Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log
deleted file mode 100644
index b9aa4b44922e1558b02d06d29a822d654f48137f..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T13:51:51.499619967Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log"}
-{"time":"2025-02-12T13:51:51.604967821Z","level":"INFO","msg":"created new stream","id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:51.60502639Z","level":"INFO","msg":"stream: started","id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:51.605130129Z","level":"INFO","msg":"writer: Do: started","stream_id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:51.605216728Z","level":"INFO","msg":"handler: started","stream_id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:51.605315277Z","level":"INFO","msg":"sender: started","stream_id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:51.888376389Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T13:51:52.0410007Z","level":"INFO","msg":"stream: closing","id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:52.041038759Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T13:51:52.041736053Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T13:51:53.021189887Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T13:51:53.364332489Z","level":"INFO","msg":"handler: closed","stream_id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:53.364389748Z","level":"INFO","msg":"sender: closed","stream_id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:53.364389018Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5m7b3lhr"}
-{"time":"2025-02-12T13:51:53.364475307Z","level":"INFO","msg":"stream: closed","id":"5m7b3lhr"}
diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log
deleted file mode 100644
index a7e7ed2dbfef267db590321ad3622db7e9cf3621..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Configure stats pid to 230520
-2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log
-2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log
-2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:init():756] calling init triggers
-2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:init():789] starting backend
-2025-02-12 13:51:51,490 INFO    MainThread:230520 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 13:51:51,496 INFO    MainThread:230520 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 13:51:51,496 INFO    MainThread:230520 [wandb_init.py:init():808] backend started and connected
-2025-02-12 13:51:51,498 INFO    MainThread:230520 [wandb_init.py:init():901] updated telemetry
-2025-02-12 13:51:51,505 INFO    MainThread:230520 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 13:51:51,885 INFO    MainThread:230520 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 13:51:52,003 INFO    MainThread:230520 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 13:51:52,004 INFO    MainThread:230520 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-51-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 13:51:52,006 INFO    MainThread:230520 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x74295c46d5e0>>
-2025-02-12 13:51:52,006 INFO    MainThread:230520 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 13:51:52,041 WARNING MsgRouterThr:230520 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb b/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb
deleted file mode 100644
index 90536287bc7c440e01774fadf1beb280c4a85b3e..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb and /dev/null differ
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/config.yaml b/wandb/run-20250212_135331-x29lgb1q/files/config.yaml
deleted file mode 100644
index 140e3770741b2dbcca969ef317059023c5bc500e..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_13-53-04_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/output.log b/wandb/run-20250212_135331-x29lgb1q/files/output.log
deleted file mode 100644
index 93558c333a13dd8d66af57c288bf367d12bd83cc..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/files/output.log
+++ /dev/null
@@ -1,2 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]
-Inside on_epoch_begin - train_dataloader: None
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/requirements.txt b/wandb/run-20250212_135331-x29lgb1q/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/wandb-metadata.json b/wandb/run-20250212_135331-x29lgb1q/files/wandb-metadata.json
deleted file mode 100644
index a90e752ec5892415734ab62b5958cbe20968d2e8..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T13:53:31.254517Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777729536"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/wandb-summary.json b/wandb/run-20250212_135331-x29lgb1q/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log
deleted file mode 100644
index 40c619c4d224bffb127309ef90b7284f8468e0d0..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T13:53:31.070943678Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpnzj0vsxp/port-230894.txt","pid":230894,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T13:53:31.105857563Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230894}
-{"time":"2025-02-12T13:53:31.105978192Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39599,"Zone":""}}
-{"time":"2025-02-12T13:53:31.2489183Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:31.257512053Z","level":"INFO","msg":"handleInformInit: received","streamId":"x29lgb1q","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:31.363529712Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"x29lgb1q","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:31.886789826Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:31.886893335Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:31.886966894Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T13:53:31.887054423Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:32.110768467Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:39599->127.0.0.1:43984: use of closed network connection","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:33.114823214Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:33.114850744Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:43984"}
-{"time":"2025-02-12T13:53:33.114865414Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log
deleted file mode 100644
index dd1c8e4964b1bf449f85380ffb5d3a89ca81e194..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T13:53:31.257882961Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log"}
-{"time":"2025-02-12T13:53:31.363436433Z","level":"INFO","msg":"created new stream","id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:31.363519102Z","level":"INFO","msg":"stream: started","id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:31.363646481Z","level":"INFO","msg":"writer: Do: started","stream_id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:31.36374892Z","level":"INFO","msg":"sender: started","stream_id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:31.363851899Z","level":"INFO","msg":"handler: started","stream_id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:31.741270453Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T13:53:31.886903155Z","level":"INFO","msg":"stream: closing","id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:31.886939864Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T13:53:31.887754548Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T13:53:32.909722722Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T13:53:33.114562596Z","level":"INFO","msg":"handler: closed","stream_id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:33.114621616Z","level":"INFO","msg":"writer: Close: closed","stream_id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:33.114647856Z","level":"INFO","msg":"sender: closed","stream_id":"x29lgb1q"}
-{"time":"2025-02-12T13:53:33.114708545Z","level":"INFO","msg":"stream: closed","id":"x29lgb1q"}
diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug.log
deleted file mode 100644
index 305941888c14c138a20e3bb8a3a09cb3ea14fc13..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135331-x29lgb1q/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Configure stats pid to 230894
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug.log
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:init():756] calling init triggers
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:init():789] starting backend
-2025-02-12 13:53:31,248 INFO    MainThread:230894 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 13:53:31,254 INFO    MainThread:230894 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 13:53:31,254 INFO    MainThread:230894 [wandb_init.py:init():808] backend started and connected
-2025-02-12 13:53:31,255 INFO    MainThread:230894 [wandb_init.py:init():901] updated telemetry
-2025-02-12 13:53:31,260 INFO    MainThread:230894 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 13:53:31,738 INFO    MainThread:230894 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 13:53:31,846 INFO    MainThread:230894 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 13:53:31,847 INFO    MainThread:230894 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 13:53:31,847 INFO    MainThread:230894 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 13:53:31,847 INFO    MainThread:230894 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 13:53:31,848 INFO    MainThread:230894 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 13:53:31,849 INFO    MainThread:230894 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-53-04_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 13:53:31,852 INFO    MainThread:230894 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x72c09ca6eed0>>
-2025-02-12 13:53:31,852 INFO    MainThread:230894 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 13:53:31,887 WARNING MsgRouterThr:230894 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb b/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb
deleted file mode 100644
index 4dde8c9fffacb3c18e046c0f82aeb24874408d5a..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb and /dev/null differ
diff --git a/wandb/run-20250212_135435-53evlis5/files/config.yaml b/wandb/run-20250212_135435-53evlis5/files/config.yaml
deleted file mode 100644
index 30f65cde960f35c69b0bdf95f4b9d9d2b3b1184f..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_13-54-12_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_135435-53evlis5/files/output.log b/wandb/run-20250212_135435-53evlis5/files/output.log
deleted file mode 100644
index 93558c333a13dd8d66af57c288bf367d12bd83cc..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/files/output.log
+++ /dev/null
@@ -1,2 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]
-Inside on_epoch_begin - train_dataloader: None
diff --git a/wandb/run-20250212_135435-53evlis5/files/requirements.txt b/wandb/run-20250212_135435-53evlis5/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135435-53evlis5/files/wandb-metadata.json b/wandb/run-20250212_135435-53evlis5/files/wandb-metadata.json
deleted file mode 100644
index c9b338ad3442a65b2ed7fea55d58accc80964997..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T13:54:36.201204Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777815552"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_135435-53evlis5/files/wandb-summary.json b/wandb/run-20250212_135435-53evlis5/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug-core.log b/wandb/run-20250212_135435-53evlis5/logs/debug-core.log
deleted file mode 100644
index 63b9ba146e9102da80c2f2f7f97d0353b6c79877..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T13:54:36.018759837Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpojka6x19/port-231248.txt","pid":231248,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T13:54:36.02418464Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":231248}
-{"time":"2025-02-12T13:54:36.02415448Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46325,"Zone":""}}
-{"time":"2025-02-12T13:54:36.194658966Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:36.203723827Z","level":"INFO","msg":"handleInformInit: received","streamId":"53evlis5","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:36.309920346Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"53evlis5","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:36.758732366Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:36.758792086Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:36.758889215Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T13:54:36.758938304Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:36.979693541Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:46325->127.0.0.1:35906: use of closed network connection","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:38.005429072Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:38.005459881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35906"}
-{"time":"2025-02-12T13:54:38.005482981Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log b/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log
deleted file mode 100644
index 12101c30e8dcabd6c545c213d9abcb5c366354b2..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T13:54:36.204103004Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug-core.log"}
-{"time":"2025-02-12T13:54:36.309862037Z","level":"INFO","msg":"created new stream","id":"53evlis5"}
-{"time":"2025-02-12T13:54:36.309910267Z","level":"INFO","msg":"stream: started","id":"53evlis5"}
-{"time":"2025-02-12T13:54:36.310009206Z","level":"INFO","msg":"writer: Do: started","stream_id":"53evlis5"}
-{"time":"2025-02-12T13:54:36.310099665Z","level":"INFO","msg":"sender: started","stream_id":"53evlis5"}
-{"time":"2025-02-12T13:54:36.310319183Z","level":"INFO","msg":"handler: started","stream_id":"53evlis5"}
-{"time":"2025-02-12T13:54:36.609964328Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T13:54:36.758890355Z","level":"INFO","msg":"stream: closing","id":"53evlis5"}
-{"time":"2025-02-12T13:54:36.758937814Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T13:54:36.759635628Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T13:54:37.773850697Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T13:54:38.005111685Z","level":"INFO","msg":"handler: closed","stream_id":"53evlis5"}
-{"time":"2025-02-12T13:54:38.005175764Z","level":"INFO","msg":"sender: closed","stream_id":"53evlis5"}
-{"time":"2025-02-12T13:54:38.005166594Z","level":"INFO","msg":"writer: Close: closed","stream_id":"53evlis5"}
-{"time":"2025-02-12T13:54:38.005321073Z","level":"INFO","msg":"stream: closed","id":"53evlis5"}
diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug.log b/wandb/run-20250212_135435-53evlis5/logs/debug.log
deleted file mode 100644
index 5f43982224cb3297178d00e7d017b3a59158840e..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135435-53evlis5/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Configure stats pid to 231248
-2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug.log
-2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log
-2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:init():756] calling init triggers
-2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:init():789] starting backend
-2025-02-12 13:54:36,194 INFO    MainThread:231248 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 13:54:36,200 INFO    MainThread:231248 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 13:54:36,200 INFO    MainThread:231248 [wandb_init.py:init():808] backend started and connected
-2025-02-12 13:54:36,203 INFO    MainThread:231248 [wandb_init.py:init():901] updated telemetry
-2025-02-12 13:54:36,210 INFO    MainThread:231248 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 13:54:36,606 INFO    MainThread:231248 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 13:54:36,720 INFO    MainThread:231248 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 13:54:36,721 INFO    MainThread:231248 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-54-12_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 13:54:36,723 INFO    MainThread:231248 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7e080a0953a0>>
-2025-02-12 13:54:36,723 INFO    MainThread:231248 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 13:54:36,758 WARNING MsgRouterThr:231248 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb b/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb
deleted file mode 100644
index 7ec7ba20b62cc55f53b817904fd528e0aeac4066..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb and /dev/null differ
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml b/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml
deleted file mode 100644
index c51c497794483c0b0380f14431b8680dc5f7ba9b..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_13-56-06_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/output.log b/wandb/run-20250212_135631-dnrqwgd0/files/output.log
deleted file mode 100644
index 0c7c41f13b26424a300a12e79a0240a7acd5a6a5..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/files/output.log
+++ /dev/null
@@ -1,23 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-Inside on_epoch_begin - train_dataloader: None
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 632, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt b/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json
deleted file mode 100644
index 65ae49a1584b5c0324cbf98ee13156b2a98ec35e..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T13:56:31.423900Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313777905664"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log
deleted file mode 100644
index b147dcc35bf777332920dc3f3ca4421b26256461..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T13:56:31.233734243Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmp_1p0wl/port-231645.txt","pid":231645,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T13:56:31.237208984Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":231645}
-{"time":"2025-02-12T13:56:31.237177324Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38409,"Zone":""}}
-{"time":"2025-02-12T13:56:31.418128107Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:31.426328298Z","level":"INFO","msg":"handleInformInit: received","streamId":"dnrqwgd0","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:31.532291862Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"dnrqwgd0","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:31.979710518Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:31.979771727Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:31.979834686Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T13:56:31.979890276Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:32.195706105Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:38409->127.0.0.1:60084: use of closed network connection","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:33.231929311Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:33.231969701Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:60084"}
-{"time":"2025-02-12T13:56:33.231990701Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log
deleted file mode 100644
index dd5c1ea629fb01670d3aa33b82105add3e58fdbf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T13:56:31.426643885Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log"}
-{"time":"2025-02-12T13:56:31.532231333Z","level":"INFO","msg":"created new stream","id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:31.532282222Z","level":"INFO","msg":"stream: started","id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:31.532401851Z","level":"INFO","msg":"writer: Do: started","stream_id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:31.532436711Z","level":"INFO","msg":"sender: started","stream_id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:31.53251893Z","level":"INFO","msg":"handler: started","stream_id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:31.831057361Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T13:56:31.979836506Z","level":"INFO","msg":"stream: closing","id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:31.979949235Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T13:56:31.981148335Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T13:56:33.006463404Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T13:56:33.231629384Z","level":"INFO","msg":"handler: closed","stream_id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:33.231703643Z","level":"INFO","msg":"writer: Close: closed","stream_id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:33.231748163Z","level":"INFO","msg":"sender: closed","stream_id":"dnrqwgd0"}
-{"time":"2025-02-12T13:56:33.231782443Z","level":"INFO","msg":"stream: closed","id":"dnrqwgd0"}
diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log
deleted file mode 100644
index e628857ea58f73e686450dd9fd9e0941437c4aac..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Configure stats pid to 231645
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:init():756] calling init triggers
-2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 13:56:31,209 INFO    MainThread:231645 [wandb_init.py:init():789] starting backend
-2025-02-12 13:56:31,417 INFO    MainThread:231645 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 13:56:31,423 INFO    MainThread:231645 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 13:56:31,423 INFO    MainThread:231645 [wandb_init.py:init():808] backend started and connected
-2025-02-12 13:56:31,425 INFO    MainThread:231645 [wandb_init.py:init():901] updated telemetry
-2025-02-12 13:56:31,430 INFO    MainThread:231645 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 13:56:31,828 INFO    MainThread:231645 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 13:56:31,941 INFO    MainThread:231645 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 13:56:31,942 INFO    MainThread:231645 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-56-06_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 13:56:31,944 INFO    MainThread:231645 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7bfc4be86ff0>>
-2025-02-12 13:56:31,945 INFO    MainThread:231645 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 13:56:31,980 WARNING MsgRouterThr:231645 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb b/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb
deleted file mode 100644
index f4fa1b56219f3fbe8b9ded10000ea6f8e8711b5b..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb and /dev/null differ
diff --git a/wandb/run-20250212_140456-85d9ssit/files/config.yaml b/wandb/run-20250212_140456-85d9ssit/files/config.yaml
deleted file mode 100644
index 20459048deba1181e28d01917b6ee58a0b19c371..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_14-04-28_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_140456-85d9ssit/files/output.log b/wandb/run-20250212_140456-85d9ssit/files/output.log
deleted file mode 100644
index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/files/output.log
+++ /dev/null
@@ -1,22 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
-    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
-    return self.call_event("on_epoch_begin", args, state, control)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
-    result = getattr(callback, event)(
-             ^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
-    if isinstance(train_dataloader.dataset, IterableDatasetShard):
-                  ^^^^^^^^^^^^^^^^^^^^^^^^
-AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_140456-85d9ssit/files/requirements.txt b/wandb/run-20250212_140456-85d9ssit/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json b/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json
deleted file mode 100644
index 609a64191cc1e6ded5bd0a4031c0e83b27c08926..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json
+++ /dev/null
@@ -1,87 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T14:04:56.751445Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--audio_column_name=audio",
-    "--text_column_name=sentence",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "313778016256"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json b/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json
deleted file mode 100644
index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log b/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log
deleted file mode 100644
index f601fa971eb613b8b904449c275d1827295ba786..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log
+++ /dev/null
@@ -1,14 +0,0 @@
-{"time":"2025-02-12T14:04:56.567564578Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqs28ml67/port-232359.txt","pid":232359,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T14:04:56.573119086Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":232359}
-{"time":"2025-02-12T14:04:56.573060477Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41429,"Zone":""}}
-{"time":"2025-02-12T14:04:56.745144471Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:56.753547786Z","level":"INFO","msg":"handleInformInit: received","streamId":"85d9ssit","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:56.859061499Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"85d9ssit","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:57.327873486Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:57.327950506Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:57.327989686Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T14:04:57.328056845Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:57.543980132Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41429->127.0.0.1:35380: use of closed network connection","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:58.65202789Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:58.65205631Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35380"}
-{"time":"2025-02-12T14:04:58.65210661Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log b/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log
deleted file mode 100644
index 34dfbdb326b60005e9dada05ae305ad436dfd321..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T14:04:56.753826604Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log"}
-{"time":"2025-02-12T14:04:56.859010159Z","level":"INFO","msg":"created new stream","id":"85d9ssit"}
-{"time":"2025-02-12T14:04:56.859052399Z","level":"INFO","msg":"stream: started","id":"85d9ssit"}
-{"time":"2025-02-12T14:04:56.859127668Z","level":"INFO","msg":"writer: Do: started","stream_id":"85d9ssit"}
-{"time":"2025-02-12T14:04:56.859226827Z","level":"INFO","msg":"sender: started","stream_id":"85d9ssit"}
-{"time":"2025-02-12T14:04:56.859302168Z","level":"INFO","msg":"handler: started","stream_id":"85d9ssit"}
-{"time":"2025-02-12T14:04:57.172644512Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T14:04:57.327994966Z","level":"INFO","msg":"stream: closing","id":"85d9ssit"}
-{"time":"2025-02-12T14:04:57.328025695Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T14:04:57.328790509Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T14:04:58.425349995Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T14:04:58.651670973Z","level":"INFO","msg":"handler: closed","stream_id":"85d9ssit"}
-{"time":"2025-02-12T14:04:58.651730382Z","level":"INFO","msg":"sender: closed","stream_id":"85d9ssit"}
-{"time":"2025-02-12T14:04:58.651712323Z","level":"INFO","msg":"writer: Close: closed","stream_id":"85d9ssit"}
-{"time":"2025-02-12T14:04:58.651855241Z","level":"INFO","msg":"stream: closed","id":"85d9ssit"}
diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug.log b/wandb/run-20250212_140456-85d9ssit/logs/debug.log
deleted file mode 100644
index 1164b0f163d2f03c5598f4e9dbd5df03ddd05535..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_140456-85d9ssit/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Configure stats pid to 232359
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug.log
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:init():756] calling init triggers
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:init():789] starting backend
-2025-02-12 14:04:56,745 INFO    MainThread:232359 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 14:04:56,750 INFO    MainThread:232359 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 14:04:56,751 INFO    MainThread:232359 [wandb_init.py:init():808] backend started and connected
-2025-02-12 14:04:56,753 INFO    MainThread:232359 [wandb_init.py:init():901] updated telemetry
-2025-02-12 14:04:56,760 INFO    MainThread:232359 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 14:04:57,169 INFO    MainThread:232359 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 14:04:57,289 INFO    MainThread:232359 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 14:04:57,290 INFO    MainThread:232359 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-04-28_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 14:04:57,292 INFO    MainThread:232359 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7fea6eea2e70>>
-2025-02-12 14:04:57,292 INFO    MainThread:232359 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 14:04:57,328 WARNING MsgRouterThr:232359 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb b/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb
deleted file mode 100644
index 6aecf6528222a0b9ca923222cc04215f30e7b215..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb and /dev/null differ
diff --git a/wandb/run-20250212_144814-onbjaexn/files/config.yaml b/wandb/run-20250212_144814-onbjaexn/files/config.yaml
deleted file mode 100644
index e7bf2050c408e1c177d2cf067168af9e543d1374..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_14-47-46_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_144814-onbjaexn/files/output.log b/wandb/run-20250212_144814-onbjaexn/files/output.log
deleted file mode 100644
index 3a117abe7df89f3665e8ab773843e3452ad54bec..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/files/output.log
+++ /dev/null
@@ -1,49 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 635, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 584, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
-    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
-                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
-    batch_samples += [next(epoch_iterator)]
-                      ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
-    next_batch, next_batch_info = self._fetch_batches(main_iterator)
-                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
-    batches.append(next(iterator))
-                   ^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
-    data = self._next_data()
-           ^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
-    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
-    data.append(next(self.dataset_iter))
-                ^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
-    for key, example in ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
-    for key, example in iterator:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
-    for key, example in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
-    for x in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
-    processed_inputs = self.function(*function_args, **self.fn_kwargs)
-                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 474, in prepare_dataset
-    inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
-                               ~~~~~~^^^^^^^^^
-KeyError: 'array'
diff --git a/wandb/run-20250212_144814-onbjaexn/files/requirements.txt b/wandb/run-20250212_144814-onbjaexn/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json b/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json
deleted file mode 100644
index 3617f33b6a9e0d7a88e02c955cc5ba8d7786fe2e..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T14:48:14.426245Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "314421264384"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json b/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json
deleted file mode 100644
index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log b/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log
deleted file mode 100644
index 6129efa42871aad3a2eaf1f4ed957947fcadfc81..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log
+++ /dev/null
@@ -1,13 +0,0 @@
-{"time":"2025-02-12T14:48:14.241936287Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpn0iuoxdb/port-235726.txt","pid":235726,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T14:48:14.269915432Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":235726}
-{"time":"2025-02-12T14:48:14.269970272Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44879,"Zone":""}}
-{"time":"2025-02-12T14:48:14.42013725Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:14.429435808Z","level":"INFO","msg":"handleInformInit: received","streamId":"onbjaexn","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:14.534605813Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"onbjaexn","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:17.23040957Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:17.230519299Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:17.230528979Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T14:48:17.230598258Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:18.137682794Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:18.137715544Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:38426"}
-{"time":"2025-02-12T14:48:18.137735074Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log b/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log
deleted file mode 100644
index 20bf3cf198ac1918e999652096a9c93e415993c3..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T14:48:14.429796675Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log"}
-{"time":"2025-02-12T14:48:14.534527814Z","level":"INFO","msg":"created new stream","id":"onbjaexn"}
-{"time":"2025-02-12T14:48:14.534596853Z","level":"INFO","msg":"stream: started","id":"onbjaexn"}
-{"time":"2025-02-12T14:48:14.534694833Z","level":"INFO","msg":"writer: Do: started","stream_id":"onbjaexn"}
-{"time":"2025-02-12T14:48:14.534760432Z","level":"INFO","msg":"handler: started","stream_id":"onbjaexn"}
-{"time":"2025-02-12T14:48:14.534942391Z","level":"INFO","msg":"sender: started","stream_id":"onbjaexn"}
-{"time":"2025-02-12T14:48:14.842151491Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T14:48:17.230483349Z","level":"INFO","msg":"stream: closing","id":"onbjaexn"}
-{"time":"2025-02-12T14:48:17.230507079Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T14:48:17.231180494Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T14:48:17.906287174Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T14:48:18.137280277Z","level":"INFO","msg":"handler: closed","stream_id":"onbjaexn"}
-{"time":"2025-02-12T14:48:18.137369966Z","level":"INFO","msg":"writer: Close: closed","stream_id":"onbjaexn"}
-{"time":"2025-02-12T14:48:18.137418376Z","level":"INFO","msg":"sender: closed","stream_id":"onbjaexn"}
-{"time":"2025-02-12T14:48:18.137549935Z","level":"INFO","msg":"stream: closed","id":"onbjaexn"}
diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug.log b/wandb/run-20250212_144814-onbjaexn/logs/debug.log
deleted file mode 100644
index ef4069012800c296bd0c14d5ed4d73d0c2467c96..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_144814-onbjaexn/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Configure stats pid to 235726
-2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug.log
-2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log
-2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:init():756] calling init triggers
-2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:init():789] starting backend
-2025-02-12 14:48:14,419 INFO    MainThread:235726 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 14:48:14,425 INFO    MainThread:235726 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 14:48:14,425 INFO    MainThread:235726 [wandb_init.py:init():808] backend started and connected
-2025-02-12 14:48:14,428 INFO    MainThread:235726 [wandb_init.py:init():901] updated telemetry
-2025-02-12 14:48:14,434 INFO    MainThread:235726 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 14:48:14,839 INFO    MainThread:235726 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 14:48:14,951 INFO    MainThread:235726 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 14:48:14,953 INFO    MainThread:235726 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-47-46_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 14:48:14,955 INFO    MainThread:235726 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x766a6ba7a9f0>>
-2025-02-12 14:48:14,955 INFO    MainThread:235726 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 14:48:17,230 WARNING MsgRouterThr:235726 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb b/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb
deleted file mode 100644
index 3bcda516eefb5fc320ac07ed1aa34f5c893f6e18..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb and /dev/null differ
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/config.yaml b/wandb/run-20250212_145250-7h6sh6az/files/config.yaml
deleted file mode 100644
index 1659e855217bf787e28b7c6fcdf6754048d4f34e..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_14-52-23_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/output.log b/wandb/run-20250212_145250-7h6sh6az/files/output.log
deleted file mode 100644
index ed56b0930f3380f5d2cd8b2ba4660678179dbb05..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/files/output.log
+++ /dev/null
@@ -1,52 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 657, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 606, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
-    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
-                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
-    batch_samples += [next(epoch_iterator)]
-                      ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
-    next_batch, next_batch_info = self._fetch_batches(main_iterator)
-                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
-    batches.append(next(iterator))
-                   ^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
-    data = self._next_data()
-           ^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
-    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
-    data.append(next(self.dataset_iter))
-                ^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
-    for key, example in ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
-    for key, example in iterator:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
-    for key, example in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
-    for x in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
-    processed_inputs = self.function(*function_args, **self.fn_kwargs)
-                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 492, in prepare_dataset
-    inputs = feature_extractor(audio_array, sampling_rate=sampling_rate)
-             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/models/whisper/feature_extraction_whisper.py", line 265, in __call__
-    raw_speech = np.asarray(raw_speech, dtype=np.float32)
-                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-ValueError: could not convert string to float: 'common_voice_eu_39287311.wav'
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt b/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json b/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json
deleted file mode 100644
index e0270308edb884f1317d7a8c8ecd6d03846cb42d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T14:52:51.028960Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "315195543552"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json b/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json
deleted file mode 100644
index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log
deleted file mode 100644
index bdc472ce22920952bc84d9ac978de3754c21a2ea..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log
+++ /dev/null
@@ -1,13 +0,0 @@
-{"time":"2025-02-12T14:52:50.845987197Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp6ead6ms8/port-236505.txt","pid":236505,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T14:52:50.851144401Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":236505}
-{"time":"2025-02-12T14:52:50.851121011Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45803,"Zone":""}}
-{"time":"2025-02-12T14:52:51.022520498Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:51.031293581Z","level":"INFO","msg":"handleInformInit: received","streamId":"7h6sh6az","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:51.13681882Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"7h6sh6az","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:53.567639763Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:53.567714252Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:53.567766542Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T14:52:53.567883362Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:54.608402958Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:54.608420568Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:43038"}
-{"time":"2025-02-12T14:52:54.608430008Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log
deleted file mode 100644
index 193e726955b8de9172c8a8da3854bacc0b3770ef..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T14:52:51.031699779Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log"}
-{"time":"2025-02-12T14:52:51.13677806Z","level":"INFO","msg":"created new stream","id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:51.13681223Z","level":"INFO","msg":"stream: started","id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:51.13682766Z","level":"INFO","msg":"writer: Do: started","stream_id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:51.136887979Z","level":"INFO","msg":"handler: started","stream_id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:51.136996279Z","level":"INFO","msg":"sender: started","stream_id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:51.40503864Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T14:52:53.567741802Z","level":"INFO","msg":"stream: closing","id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:53.567786892Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T14:52:53.56844074Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T14:52:54.38014778Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T14:52:54.608180109Z","level":"INFO","msg":"handler: closed","stream_id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:54.608239299Z","level":"INFO","msg":"writer: Close: closed","stream_id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:54.608252109Z","level":"INFO","msg":"sender: closed","stream_id":"7h6sh6az"}
-{"time":"2025-02-12T14:52:54.608324618Z","level":"INFO","msg":"stream: closed","id":"7h6sh6az"}
diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug.log
deleted file mode 100644
index 9e0a5b7af23b12f958416746291b2016b27b0920..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145250-7h6sh6az/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Configure stats pid to 236505
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug.log
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:init():756] calling init triggers
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:init():789] starting backend
-2025-02-12 14:52:51,022 INFO    MainThread:236505 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 14:52:51,028 INFO    MainThread:236505 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 14:52:51,028 INFO    MainThread:236505 [wandb_init.py:init():808] backend started and connected
-2025-02-12 14:52:51,030 INFO    MainThread:236505 [wandb_init.py:init():901] updated telemetry
-2025-02-12 14:52:51,037 INFO    MainThread:236505 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 14:52:51,402 INFO    MainThread:236505 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 14:52:51,521 INFO    MainThread:236505 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 14:52:51,522 INFO    MainThread:236505 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-52-23_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 14:52:51,524 INFO    MainThread:236505 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7289d4692540>>
-2025-02-12 14:52:51,524 INFO    MainThread:236505 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 14:52:53,567 WARNING MsgRouterThr:236505 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb b/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb
deleted file mode 100644
index 11838bad33d6e4a1a00df232467c3bd278b754a5..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb and /dev/null differ
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml b/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml
deleted file mode 100644
index 1aee5532694c9c3a16dde77aeb5d364ddac0c141..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_14-54-21_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/output.log b/wandb/run-20250212_145446-ncnr0yzu/files/output.log
deleted file mode 100644
index f01eb94ffe96faade97a93f96968111bebc6ef65..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/files/output.log
+++ /dev/null
@@ -1,52 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 653, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 602, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
-    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
-                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
-    batch_samples += [next(epoch_iterator)]
-                      ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
-    next_batch, next_batch_info = self._fetch_batches(main_iterator)
-                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
-    batches.append(next(iterator))
-                   ^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
-    data = self._next_data()
-           ^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
-    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
-    data.append(next(self.dataset_iter))
-                ^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
-    for key, example in ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
-    for key, example in iterator:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
-    for key, example in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
-    for x in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
-    processed_inputs = self.function(*function_args, **self.fn_kwargs)
-                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 484, in prepare_dataset
-    inputs = feature_extractor(batch[audio_column_name], sampling_rate=feature_extractor.sampling_rate)
-             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/models/whisper/feature_extraction_whisper.py", line 265, in __call__
-    raw_speech = np.asarray(raw_speech, dtype=np.float32)
-                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-TypeError: float() argument must be a string or a real number, not 'dict'
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt b/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json
deleted file mode 100644
index 5f58184ad4e6e2757e4e20e41ecb4b819e7e4daf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T14:54:46.573889Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "315195682816"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json
deleted file mode 100644
index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log
deleted file mode 100644
index db5aa803baede20483e71c9d07c375e3db38e6fd..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log
+++ /dev/null
@@ -1,13 +0,0 @@
-{"time":"2025-02-12T14:54:46.391038145Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpn5cre9oi/port-236985.txt","pid":236985,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T14:54:46.395903327Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":236985}
-{"time":"2025-02-12T14:54:46.395861037Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38029,"Zone":""}}
-{"time":"2025-02-12T14:54:46.567245341Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:46.576515207Z","level":"INFO","msg":"handleInformInit: received","streamId":"ncnr0yzu","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:46.683228559Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ncnr0yzu","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:48.66373831Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:48.66385373Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T14:54:48.66383821Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:48.663953679Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:49.591574304Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:49.591599923Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:46988"}
-{"time":"2025-02-12T14:54:49.591615933Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log
deleted file mode 100644
index 4e45c5ab16d921728ca58083736e7bfa341dd2e1..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T14:54:46.576935865Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log"}
-{"time":"2025-02-12T14:54:46.683143569Z","level":"INFO","msg":"created new stream","id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:46.683218799Z","level":"INFO","msg":"stream: started","id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:46.683354688Z","level":"INFO","msg":"writer: Do: started","stream_id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:46.683407538Z","level":"INFO","msg":"sender: started","stream_id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:46.683417878Z","level":"INFO","msg":"handler: started","stream_id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:46.986318334Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T14:54:48.66385817Z","level":"INFO","msg":"stream: closing","id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:48.66390572Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T14:54:48.664605317Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T14:54:49.357544434Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T14:54:49.591242244Z","level":"INFO","msg":"handler: closed","stream_id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:49.591302134Z","level":"INFO","msg":"writer: Close: closed","stream_id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:49.591331464Z","level":"INFO","msg":"sender: closed","stream_id":"ncnr0yzu"}
-{"time":"2025-02-12T14:54:49.591428814Z","level":"INFO","msg":"stream: closed","id":"ncnr0yzu"}
diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log
deleted file mode 100644
index d1847e519db8f0646fddd46ec510ca163b76e2ca..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Configure stats pid to 236985
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:init():756] calling init triggers
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:init():789] starting backend
-2025-02-12 14:54:46,567 INFO    MainThread:236985 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 14:54:46,573 INFO    MainThread:236985 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 14:54:46,573 INFO    MainThread:236985 [wandb_init.py:init():808] backend started and connected
-2025-02-12 14:54:46,575 INFO    MainThread:236985 [wandb_init.py:init():901] updated telemetry
-2025-02-12 14:54:46,582 INFO    MainThread:236985 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 14:54:46,982 INFO    MainThread:236985 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 14:54:47,099 INFO    MainThread:236985 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 14:54:47,100 INFO    MainThread:236985 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-54-21_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 14:54:47,102 INFO    MainThread:236985 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7844fa86aff0>>
-2025-02-12 14:54:47,102 INFO    MainThread:236985 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 14:54:48,664 WARNING MsgRouterThr:236985 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb b/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb
deleted file mode 100644
index 65c0bd3fd32e7d2796b16127fa20b73e04cb090e..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb and /dev/null differ
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml b/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml
deleted file mode 100644
index 1b8c28c55668d6406f48f51123b6234e497287be..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml
+++ /dev/null
@@ -1,512 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_14-58-28_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/output.log b/wandb/run-20250212_145852-0gfsy6hh/files/output.log
deleted file mode 100644
index e5c0ebecf44b6daff661d6bce1288f2c744ab14f..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/files/output.log
+++ /dev/null
@@ -1,49 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 639, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 588, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
-    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
-                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
-    batch_samples += [next(epoch_iterator)]
-                      ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
-    next_batch, next_batch_info = self._fetch_batches(main_iterator)
-                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
-    batches.append(next(iterator))
-                   ^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
-    data = self._next_data()
-           ^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
-    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
-    data.append(next(self.dataset_iter))
-                ^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
-    for key, example in ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
-    for key, example in iterator:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
-    for key, example in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
-    for x in self.ex_iterable:
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
-    yield from self._iter()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
-    processed_inputs = self.function(*function_args, **self.fn_kwargs)
-                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 477, in prepare_dataset
-    audio_array = sample["array"]
-                  ~~~~~~^^^^^^^^^
-KeyError: 'array'
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt b/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json
deleted file mode 100644
index 91a6670d98692edca3c7c2302ed41497a23b0b97..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T14:58:52.625032Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "315206733824"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json
deleted file mode 100644
index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log
deleted file mode 100644
index 5bc552cfe42bb92f4ee190b01d46a0bd5b93b82d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log
+++ /dev/null
@@ -1,13 +0,0 @@
-{"time":"2025-02-12T14:58:52.442304412Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpkux429nf/port-237900.txt","pid":237900,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T14:58:52.44698044Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":237900}
-{"time":"2025-02-12T14:58:52.446971151Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":42317,"Zone":""}}
-{"time":"2025-02-12T14:58:52.618246003Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:52.62556589Z","level":"INFO","msg":"handleInformInit: received","streamId":"0gfsy6hh","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:52.729897747Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"0gfsy6hh","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:54.959605329Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:54.959661068Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:54.959726878Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:54.959733858Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T14:58:55.879911345Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:55.879969174Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:39202"}
-{"time":"2025-02-12T14:58:55.879979154Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log
deleted file mode 100644
index c315ae03c741865692c58d12afec5388b478f0a7..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T14:58:52.62568889Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log"}
-{"time":"2025-02-12T14:58:52.729819547Z","level":"INFO","msg":"created new stream","id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:52.729885047Z","level":"INFO","msg":"stream: started","id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:52.729955257Z","level":"INFO","msg":"sender: started","stream_id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:52.729941107Z","level":"INFO","msg":"writer: Do: started","stream_id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:52.730185226Z","level":"INFO","msg":"handler: started","stream_id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:52.998836495Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T14:58:54.959699198Z","level":"INFO","msg":"stream: closing","id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:54.959735308Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T14:58:54.960467105Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T14:58:55.63479944Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T14:58:55.878933079Z","level":"INFO","msg":"handler: closed","stream_id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:55.878978098Z","level":"INFO","msg":"sender: closed","stream_id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:55.878984668Z","level":"INFO","msg":"writer: Close: closed","stream_id":"0gfsy6hh"}
-{"time":"2025-02-12T14:58:55.879264427Z","level":"INFO","msg":"stream: closed","id":"0gfsy6hh"}
diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log
deleted file mode 100644
index b6929aba4219df8569a99e62bdfd8415b77c4a11..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Configure stats pid to 237900
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:init():756] calling init triggers
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:init():789] starting backend
-2025-02-12 14:58:52,618 INFO    MainThread:237900 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 14:58:52,624 INFO    MainThread:237900 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 14:58:52,624 INFO    MainThread:237900 [wandb_init.py:init():808] backend started and connected
-2025-02-12 14:58:52,627 INFO    MainThread:237900 [wandb_init.py:init():901] updated telemetry
-2025-02-12 14:58:52,634 INFO    MainThread:237900 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 14:58:52,995 INFO    MainThread:237900 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 14:58:53,108 INFO    MainThread:237900 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 14:58:53,110 INFO    MainThread:237900 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-58-28_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 14:58:53,112 INFO    MainThread:237900 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eaf4e0976e0>>
-2025-02-12 14:58:53,112 INFO    MainThread:237900 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 14:58:54,959 WARNING MsgRouterThr:237900 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb b/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb
deleted file mode 100644
index 38232a1d6938c32ba68a2c2ebc20cb3a1500e7dc..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb and /dev/null differ
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/config.yaml b/wandb/run-20250212_152506-cp47eoxt/files/config.yaml
deleted file mode 100644
index 8ad19f35aa9b0b821482834434adf1e4e566842e..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/files/config.yaml
+++ /dev/null
@@ -1,536 +0,0 @@
-_attn_implementation_autoset:
-    value: true
-_name_or_path:
-    value: openai/whisper-small
-_wandb:
-    value:
-        cli_version: 0.19.6
-        m:
-            - "1": train/global_step
-              "6":
-                - 3
-              "7": []
-            - "1": train/loss
-              "5": 1
-              "6":
-                - 1
-                - 3
-              "7": []
-            - "1": train/grad_norm
-              "5": 1
-              "6":
-                - 1
-                - 3
-              "7": []
-            - "1": train/learning_rate
-              "5": 1
-              "6":
-                - 1
-                - 3
-              "7": []
-            - "1": train/epoch
-              "5": 1
-              "6":
-                - 1
-                - 3
-              "7": []
-        python_version: 3.12.3
-        t:
-            "1":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "2":
-                - 1
-                - 5
-                - 11
-                - 49
-                - 51
-                - 53
-                - 55
-                - 71
-                - 100
-            "3":
-                - 7
-                - 13
-                - 19
-                - 23
-                - 55
-                - 66
-            "4": 3.12.3
-            "5": 0.19.6
-            "6": 4.49.0.dev0
-            "8":
-                - 5
-            "9":
-                "1": transformers_trainer
-            "12": 0.19.6
-            "13": linux-x86_64
-accelerator_config:
-    value:
-        dispatch_batches: null
-        even_batches: true
-        gradient_accumulation_kwargs: null
-        non_blocking: false
-        split_batches: false
-        use_seedable_sampler: true
-activation_dropout:
-    value: 0
-activation_function:
-    value: gelu
-adafactor:
-    value: false
-adam_beta1:
-    value: 0.9
-adam_beta2:
-    value: 0.999
-adam_epsilon:
-    value: 1e-08
-add_cross_attention:
-    value: false
-apply_spec_augment:
-    value: false
-architectures:
-    value:
-        - WhisperForConditionalGeneration
-attention_dropout:
-    value: 0
-auto_find_batch_size:
-    value: false
-average_tokens_across_devices:
-    value: false
-bad_words_ids:
-    value: null
-batch_eval_metrics:
-    value: false
-begin_suppress_tokens:
-    value:
-        - 220
-        - 50257
-bf16:
-    value: false
-bf16_full_eval:
-    value: false
-bos_token_id:
-    value: 50257
-chunk_size_feed_forward:
-    value: 0
-classifier_proj_size:
-    value: 256
-cross_attention_hidden_size:
-    value: null
-d_model:
-    value: 768
-data_seed:
-    value: null
-dataloader_drop_last:
-    value: false
-dataloader_num_workers:
-    value: 0
-dataloader_persistent_workers:
-    value: false
-dataloader_pin_memory:
-    value: true
-dataloader_prefetch_factor:
-    value: null
-ddp_backend:
-    value: null
-ddp_broadcast_buffers:
-    value: null
-ddp_bucket_cap_mb:
-    value: null
-ddp_find_unused_parameters:
-    value: null
-ddp_timeout:
-    value: 1800
-debug:
-    value: []
-decoder_attention_heads:
-    value: 12
-decoder_ffn_dim:
-    value: 3072
-decoder_layerdrop:
-    value: 0
-decoder_layers:
-    value: 12
-decoder_start_token_id:
-    value: 50258
-deepspeed:
-    value: null
-disable_tqdm:
-    value: false
-dispatch_batches:
-    value: null
-diversity_penalty:
-    value: 0
-do_eval:
-    value: true
-do_predict:
-    value: false
-do_sample:
-    value: false
-do_train:
-    value: true
-dropout:
-    value: 0
-early_stopping:
-    value: false
-encoder_attention_heads:
-    value: 12
-encoder_ffn_dim:
-    value: 3072
-encoder_layerdrop:
-    value: 0
-encoder_layers:
-    value: 12
-encoder_no_repeat_ngram_size:
-    value: 0
-eos_token_id:
-    value: 50257
-eval_accumulation_steps:
-    value: null
-eval_delay:
-    value: 0
-eval_do_concat_batches:
-    value: true
-eval_on_start:
-    value: false
-eval_steps:
-    value: 1000
-eval_strategy:
-    value: steps
-eval_use_gather_object:
-    value: false
-evaluation_strategy:
-    value: steps
-exponential_decay_length_penalty:
-    value: null
-finetuning_task:
-    value: null
-forced_bos_token_id:
-    value: null
-forced_decoder_ids:
-    value: null
-forced_eos_token_id:
-    value: null
-fp16:
-    value: true
-fp16_backend:
-    value: auto
-fp16_full_eval:
-    value: false
-fp16_opt_level:
-    value: O1
-fsdp:
-    value: []
-fsdp_config:
-    value:
-        min_num_params: 0
-        xla: false
-        xla_fsdp_grad_ckpt: false
-        xla_fsdp_v2: false
-fsdp_min_num_params:
-    value: 0
-fsdp_transformer_layer_cls_to_wrap:
-    value: null
-full_determinism:
-    value: false
-generation_config:
-    value: null
-generation_max_length:
-    value: 225
-generation_num_beams:
-    value: null
-gradient_accumulation_steps:
-    value: 1
-gradient_checkpointing:
-    value: true
-gradient_checkpointing_kwargs:
-    value: null
-greater_is_better:
-    value: false
-group_by_length:
-    value: false
-half_precision_backend:
-    value: auto
-hub_always_push:
-    value: false
-hub_model_id:
-    value: null
-hub_private_repo:
-    value: null
-hub_strategy:
-    value: every_save
-hub_token:
-    value: <HUB_TOKEN>
-id2label:
-    value:
-        "0": LABEL_0
-        "1": LABEL_1
-ignore_data_skip:
-    value: false
-include_for_metrics:
-    value: []
-include_inputs_for_metrics:
-    value: false
-include_num_input_tokens_seen:
-    value: false
-include_tokens_per_second:
-    value: false
-init_std:
-    value: 0.02
-is_decoder:
-    value: false
-is_encoder_decoder:
-    value: true
-jit_mode_eval:
-    value: false
-label_names:
-    value: null
-label_smoothing_factor:
-    value: 0
-label2id:
-    value:
-        LABEL_0: 0
-        LABEL_1: 1
-learning_rate:
-    value: 1e-05
-length_column_name:
-    value: input_length
-length_penalty:
-    value: 1
-load_best_model_at_end:
-    value: true
-local_rank:
-    value: 0
-log_level:
-    value: passive
-log_level_replica:
-    value: warning
-log_on_each_node:
-    value: true
-logging_dir:
-    value: ./runs/Feb12_15-24-15_tknika
-logging_first_step:
-    value: false
-logging_nan_inf_filter:
-    value: true
-logging_steps:
-    value: 25
-logging_strategy:
-    value: steps
-lr_scheduler_type:
-    value: linear
-mask_feature_length:
-    value: 10
-mask_feature_min_masks:
-    value: 0
-mask_feature_prob:
-    value: 0
-mask_time_length:
-    value: 10
-mask_time_min_masks:
-    value: 2
-mask_time_prob:
-    value: 0.05
-max_grad_norm:
-    value: 1
-max_length:
-    value: 448
-max_source_positions:
-    value: 1500
-max_steps:
-    value: 8000
-max_target_positions:
-    value: 448
-median_filter_width:
-    value: 7
-metric_for_best_model:
-    value: wer
-min_length:
-    value: 0
-model/num_parameters:
-    value: 241734912
-model_type:
-    value: whisper
-mp_parameters:
-    value: ""
-neftune_noise_alpha:
-    value: null
-no_cuda:
-    value: false
-no_repeat_ngram_size:
-    value: 0
-num_beam_groups:
-    value: 1
-num_beams:
-    value: 1
-num_hidden_layers:
-    value: 12
-num_mel_bins:
-    value: 80
-num_return_sequences:
-    value: 1
-num_train_epochs:
-    value: 3
-optim:
-    value: adamw_torch
-optim_args:
-    value: null
-optim_target_modules:
-    value: null
-output_attentions:
-    value: false
-output_dir:
-    value: ./
-output_hidden_states:
-    value: false
-output_scores:
-    value: false
-overwrite_output_dir:
-    value: true
-pad_token_id:
-    value: 50257
-past_index:
-    value: -1
-per_device_eval_batch_size:
-    value: 16
-per_device_train_batch_size:
-    value: 32
-per_gpu_eval_batch_size:
-    value: null
-per_gpu_train_batch_size:
-    value: null
-predict_with_generate:
-    value: true
-prediction_loss_only:
-    value: false
-prefix:
-    value: null
-problem_type:
-    value: null
-push_to_hub:
-    value: true
-push_to_hub_model_id:
-    value: null
-push_to_hub_organization:
-    value: null
-push_to_hub_token:
-    value: <PUSH_TO_HUB_TOKEN>
-ray_scope:
-    value: last
-remove_invalid_values:
-    value: false
-remove_unused_columns:
-    value: true
-repetition_penalty:
-    value: 1
-report_to:
-    value:
-        - wandb
-restore_callback_states_from_checkpoint:
-    value: false
-resume_from_checkpoint:
-    value: null
-return_dict:
-    value: true
-return_dict_in_generate:
-    value: false
-run_name:
-    value: whisper-small-eu
-save_on_each_node:
-    value: false
-save_only_model:
-    value: false
-save_safetensors:
-    value: true
-save_steps:
-    value: 1000
-save_strategy:
-    value: steps
-save_total_limit:
-    value: null
-scale_embedding:
-    value: false
-seed:
-    value: 42
-sep_token_id:
-    value: null
-skip_memory_metrics:
-    value: true
-sortish_sampler:
-    value: false
-split_batches:
-    value: null
-suppress_tokens:
-    value: null
-task_specific_params:
-    value: null
-temperature:
-    value: 1
-tf_legacy_loss:
-    value: false
-tf32:
-    value: null
-tie_encoder_decoder:
-    value: false
-tie_word_embeddings:
-    value: true
-tokenizer_class:
-    value: null
-top_k:
-    value: 50
-top_p:
-    value: 1
-torch_compile:
-    value: false
-torch_compile_backend:
-    value: null
-torch_compile_mode:
-    value: null
-torch_dtype:
-    value: float32
-torch_empty_cache_steps:
-    value: null
-torchdynamo:
-    value: null
-torchscript:
-    value: false
-tpu_metrics_debug:
-    value: false
-tpu_num_cores:
-    value: null
-transformers_version:
-    value: 4.49.0.dev0
-typical_p:
-    value: 1
-use_bfloat16:
-    value: false
-use_cache:
-    value: false
-use_cpu:
-    value: false
-use_ipex:
-    value: false
-use_legacy_prediction_loop:
-    value: false
-use_liger_kernel:
-    value: false
-use_mps_device:
-    value: false
-use_weighted_layer_sum:
-    value: false
-vocab_size:
-    value: 51865
-warmup_ratio:
-    value: 0
-warmup_steps:
-    value: 500
-weight_decay:
-    value: 0
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/output.log b/wandb/run-20250212_152506-cp47eoxt/files/output.log
deleted file mode 100644
index d5181e7d9ff6e24c3fb944c5af3fc84ec3922a2c..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/files/output.log
+++ /dev/null
@@ -1,28 +0,0 @@
-  0%|                                                                                       | 0/8000 [00:00<?, ?it/s]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
-[INFO|trainer_utils.py:837] 2025-02-12 15:25:12,786 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-  0%|▏                                                                           | 25/8000 [00:33<2:23:47,  1.08s/it]Traceback (most recent call last):
-{'loss': 2.3284, 'grad_norm': 17.581905364990234, 'learning_rate': 4.4e-07, 'epoch': 0.0}
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 719, in <module>
-    main()
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 668, in main
-    train_result = trainer.train(resume_from_checkpoint=checkpoint)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
-    return inner_training_loop(
-           ^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2512, in _inner_training_loop
-    tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 3662, in training_step
-    self.accelerator.backward(loss, **kwargs)
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/accelerator.py", line 2242, in backward
-    self.scaler.scale(loss).backward(**kwargs)
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/_tensor.py", line 626, in backward
-    torch.autograd.backward(
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/autograd/__init__.py", line 347, in backward
-    _engine_run_backward(
-  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/autograd/graph.py", line 823, in _engine_run_backward
-    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-KeyboardInterrupt
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt b/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json b/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json
deleted file mode 100644
index 741ca0342dc8378ca92566276e75c09582efae0d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json
+++ /dev/null
@@ -1,86 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T15:25:06.501811Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--audio_column_name=audio",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "315485667328"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json b/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json
deleted file mode 100644
index 80f564210f160830a713856b51172daf9a4d37cf..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_step":0,"train/grad_norm":17.581905364990234,"train/learning_rate":4.4e-07,"train/epoch":0.003125,"train/global_step":25,"_timestamp":1.7393739409734626e+09,"_wandb":{"runtime":35},"_runtime":34.472001053,"train/loss":2.3284}
\ No newline at end of file
diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log
deleted file mode 100644
index 988f517348f024820b907a90b8994b2652345846..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log
+++ /dev/null
@@ -1,13 +0,0 @@
-{"time":"2025-02-12T15:25:06.32070089Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_o_4dslg/port-242535.txt","pid":242535,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T15:25:06.325654679Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":242535}
-{"time":"2025-02-12T15:25:06.325624039Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34421,"Zone":""}}
-{"time":"2025-02-12T15:25:06.495145129Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:06.504420484Z","level":"INFO","msg":"handleInformInit: received","streamId":"cp47eoxt","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:06.610655359Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"cp47eoxt","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:41.518158713Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:41.518235362Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:41.518248342Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-02-12T15:25:41.518365211Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:42.440985993Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:42.441028483Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35824"}
-{"time":"2025-02-12T15:25:42.441053643Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log
deleted file mode 100644
index ddd19b9298efd912885b30af4c20522b4d39c052..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log
+++ /dev/null
@@ -1,15 +0,0 @@
-{"time":"2025-02-12T15:25:06.504719321Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log"}
-{"time":"2025-02-12T15:25:06.61058157Z","level":"INFO","msg":"created new stream","id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:06.61064572Z","level":"INFO","msg":"stream: started","id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:06.610715339Z","level":"INFO","msg":"writer: Do: started","stream_id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:06.610734969Z","level":"INFO","msg":"handler: started","stream_id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:06.610881007Z","level":"INFO","msg":"sender: started","stream_id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:06.883150548Z","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-02-12T15:25:41.518262532Z","level":"INFO","msg":"stream: closing","id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:41.518304801Z","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-02-12T15:25:41.519096684Z","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-02-12T15:25:42.16914698Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-02-12T15:25:42.440671227Z","level":"INFO","msg":"handler: closed","stream_id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:42.440734176Z","level":"INFO","msg":"writer: Close: closed","stream_id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:42.440750356Z","level":"INFO","msg":"sender: closed","stream_id":"cp47eoxt"}
-{"time":"2025-02-12T15:25:42.440859685Z","level":"INFO","msg":"stream: closed","id":"cp47eoxt"}
diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug.log
deleted file mode 100644
index 5a044527c50b212ff0a9c5959254662b8900d25c..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152506-cp47eoxt/logs/debug.log
+++ /dev/null
@@ -1,26 +0,0 @@
-2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Configure stats pid to 242535
-2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug.log
-2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log
-2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:init():756] calling init triggers
-2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:init():789] starting backend
-2025-02-12 15:25:06,495 INFO    MainThread:242535 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 15:25:06,501 INFO    MainThread:242535 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 15:25:06,501 INFO    MainThread:242535 [wandb_init.py:init():808] backend started and connected
-2025-02-12 15:25:06,503 INFO    MainThread:242535 [wandb_init.py:init():901] updated telemetry
-2025-02-12 15:25:06,511 INFO    MainThread:242535 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 15:25:06,880 INFO    MainThread:242535 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 15:25:06,990 INFO    MainThread:242535 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 15:25:06,991 INFO    MainThread:242535 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-24-15_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 15:25:06,993 INFO    MainThread:242535 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x730850e96e70>>
-2025-02-12 15:25:06,993 INFO    MainThread:242535 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
-2025-02-12 15:25:41,518 WARNING MsgRouterThr:242535 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb b/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb
deleted file mode 100644
index cbcfbc2adf86bb17c7862166061d04cf4fb5218e..0000000000000000000000000000000000000000
Binary files a/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb and /dev/null differ
diff --git a/wandb/run-20250212_152709-lejyafmi/files/output.log b/wandb/run-20250212_152709-lejyafmi/files/output.log
deleted file mode 100644
index 7cd9b7d4875569e33ea86d709a8eee55be82c025..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152709-lejyafmi/files/output.log
+++ /dev/null
@@ -1,1782 +0,0 @@
-  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
-[INFO|trainer_utils.py:837] 2025-02-12 15:27:14,718 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-                                                                                                                                                                             
-{'loss': 2.3284, 'grad_norm': 17.579944610595703, 'learning_rate': 4.4e-07, 'epoch': 0.0}
-{'loss': 1.9145, 'grad_norm': 9.753120422363281, 'learning_rate': 9.400000000000001e-07, 'epoch': 0.01}
-{'loss': 1.2892, 'grad_norm': 9.469987869262695, 'learning_rate': 1.44e-06, 'epoch': 0.01}
-{'loss': 0.9797, 'grad_norm': 6.952774524688721, 'learning_rate': 1.94e-06, 'epoch': 0.01}
-{'loss': 0.8265, 'grad_norm': 6.080902576446533, 'learning_rate': 2.4400000000000004e-06, 'epoch': 0.02}
-{'loss': 0.6998, 'grad_norm': 5.6766037940979, 'learning_rate': 2.9400000000000002e-06, 'epoch': 0.02}
-{'loss': 0.6537, 'grad_norm': 5.372249126434326, 'learning_rate': 3.44e-06, 'epoch': 0.02}
-{'loss': 0.6149, 'grad_norm': 5.710323810577393, 'learning_rate': 3.94e-06, 'epoch': 0.03}
-{'loss': 0.5256, 'grad_norm': 5.235953330993652, 'learning_rate': 4.440000000000001e-06, 'epoch': 0.03}
-{'loss': 0.54, 'grad_norm': 6.58635950088501, 'learning_rate': 4.94e-06, 'epoch': 0.03}
-{'loss': 0.5521, 'grad_norm': 5.4912004470825195, 'learning_rate': 5.4400000000000004e-06, 'epoch': 0.03}
-{'loss': 0.5379, 'grad_norm': 5.846869945526123, 'learning_rate': 5.94e-06, 'epoch': 0.04}
-{'loss': 0.4778, 'grad_norm': 5.060309410095215, 'learning_rate': 6.440000000000001e-06, 'epoch': 0.04}
-{'loss': 0.4152, 'grad_norm': 5.06487512588501, 'learning_rate': 6.9400000000000005e-06, 'epoch': 0.04}
-{'loss': 0.3547, 'grad_norm': 4.936045169830322, 'learning_rate': 7.440000000000001e-06, 'epoch': 0.05}
-{'loss': 0.3428, 'grad_norm': 3.8072471618652344, 'learning_rate': 7.94e-06, 'epoch': 0.05}
-{'loss': 0.3099, 'grad_norm': 3.9378795623779297, 'learning_rate': 8.44e-06, 'epoch': 0.05}
-{'loss': 0.2963, 'grad_norm': 3.732869863510132, 'learning_rate': 8.94e-06, 'epoch': 0.06}
-{'loss': 0.2745, 'grad_norm': 3.9596025943756104, 'learning_rate': 9.440000000000001e-06, 'epoch': 0.06}
-{'loss': 0.2626, 'grad_norm': 3.428398370742798, 'learning_rate': 9.940000000000001e-06, 'epoch': 0.06}
-{'loss': 0.2411, 'grad_norm': 5.03747034072876, 'learning_rate': 9.970666666666668e-06, 'epoch': 0.07}
-{'loss': 0.2389, 'grad_norm': 3.2012217044830322, 'learning_rate': 9.937333333333334e-06, 'epoch': 0.07}
-{'loss': 0.2217, 'grad_norm': 3.7361278533935547, 'learning_rate': 9.904e-06, 'epoch': 0.07}
-{'loss': 0.2246, 'grad_norm': 4.509885787963867, 'learning_rate': 9.870666666666667e-06, 'epoch': 0.07}
-{'loss': 0.199, 'grad_norm': 3.462961435317993, 'learning_rate': 9.837333333333335e-06, 'epoch': 0.08}
-{'loss': 0.2156, 'grad_norm': 2.764691114425659, 'learning_rate': 9.804000000000001e-06, 'epoch': 0.08}
-{'loss': 0.212, 'grad_norm': 3.059408187866211, 'learning_rate': 9.770666666666668e-06, 'epoch': 0.08}
-{'loss': 0.2123, 'grad_norm': 3.952425718307495, 'learning_rate': 9.737333333333334e-06, 'epoch': 0.09}
-{'loss': 0.2343, 'grad_norm': 4.892609119415283, 'learning_rate': 9.704e-06, 'epoch': 0.09}
-{'loss': 0.3308, 'grad_norm': 4.592615127563477, 'learning_rate': 9.670666666666667e-06, 'epoch': 0.09}
-{'loss': 0.3146, 'grad_norm': 4.663967132568359, 'learning_rate': 9.637333333333333e-06, 'epoch': 0.1}
-{'loss': 0.3519, 'grad_norm': 5.091048717498779, 'learning_rate': 9.604000000000002e-06, 'epoch': 0.1}
-{'loss': 0.2365, 'grad_norm': 3.8216071128845215, 'learning_rate': 9.570666666666666e-06, 'epoch': 0.1}
-{'loss': 0.193, 'grad_norm': 3.122516393661499, 'learning_rate': 9.537333333333334e-06, 'epoch': 0.11}
-{'loss': 0.1759, 'grad_norm': 2.657339096069336, 'learning_rate': 9.504e-06, 'epoch': 0.11}
-{'loss': 0.2387, 'grad_norm': 4.554510116577148, 'learning_rate': 9.470666666666667e-06, 'epoch': 0.11}
-{'loss': 0.2845, 'grad_norm': 5.045220851898193, 'learning_rate': 9.437333333333334e-06, 'epoch': 0.12}
-{'loss': 0.2755, 'grad_norm': 4.260054588317871, 'learning_rate': 9.404e-06, 'epoch': 0.12}
-{'loss': 0.481, 'grad_norm': 5.8209147453308105, 'learning_rate': 9.370666666666668e-06, 'epoch': 0.12}
-{'loss': 0.3998, 'grad_norm': 5.498444557189941, 'learning_rate': 9.337333333333335e-06, 'epoch': 0.12}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 15:47:16,534 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 15:47:16,534 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 15:47:24,994 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[WARNING|logging.py:329] 2025-02-12 15:47:25,085 >> Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:25,197 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[WARNING|logging.py:329] 2025-02-12 15:47:25,198 >> The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:26,487 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:27,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:28,903 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:29,960 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:31,125 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:32,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:33,355 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:34,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:35,741 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:36,871 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:37,975 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:39,025 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:39,971 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:41,203 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:42,188 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:43,134 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:44,323 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:45,270 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:46,187 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:47,189 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:48,181 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:49,143 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:50,185 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:51,161 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:52,183 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:53,261 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:54,446 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:55,377 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:56,390 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:57,470 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:58,505 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:47:59,581 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:00,601 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:01,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:02,774 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:03,761 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:04,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:05,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:06,875 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:07,888 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:08,928 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:09,869 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:10,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:11,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:12,891 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:13,888 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:14,855 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:15,977 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:16,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:19,165 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:20,241 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:21,179 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:22,187 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:23,191 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:24,292 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:25,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:26,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:27,247 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:28,207 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:29,289 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:30,296 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:31,338 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:32,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:33,286 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:34,193 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:35,205 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:36,200 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:37,273 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:38,271 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:39,272 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:40,397 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:41,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:42,511 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:43,561 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:44,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:45,566 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:46,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:47,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:48,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:49,564 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:50,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:51,564 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:52,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:53,647 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:54,606 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:55,632 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:56,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:57,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:58,939 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:48:59,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:00,958 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:01,925 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:02,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:04,023 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:05,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:06,086 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:07,100 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:08,098 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:09,138 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:10,195 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:11,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:12,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:13,333 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:14,460 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:15,491 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:16,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:17,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:18,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:19,544 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:20,575 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:21,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:22,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:23,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:24,520 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:25,491 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:26,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:27,608 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:28,604 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:29,597 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:30,632 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:31,579 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:32,624 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:33,643 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:34,623 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:35,589 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:36,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:37,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:38,570 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:39,576 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:40,578 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 15:49:41,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
- 12%|████████████████▎                                                                                                                 | 1000/8000 [22:39<2:10:54,  1.12s/it][INFO|trainer.py:3860] 2025-02-12 15:49:49,799 >> Saving model checkpoint to ./checkpoint-1000
-{'eval_loss': 0.36512792110443115, 'eval_wer': 21.50135552023932, 'eval_runtime': 153.2646, 'eval_samples_per_second': 13.728, 'eval_steps_per_second': 0.861, 'epoch': 0.12}
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:2810: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.
-  warnings.warn(
-[INFO|configuration_utils.py:423] 2025-02-12 15:49:49,801 >> Configuration saved in ./checkpoint-1000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 15:49:49,802 >> Configuration saved in ./checkpoint-1000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 15:49:51,193 >> Model weights saved in ./checkpoint-1000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 15:49:51,195 >> Feature extractor saved in ./checkpoint-1000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 15:49:54,577 >> Feature extractor saved in ./preprocessor_config.json
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
- 13%|████████████████▏                                                                                                                | 1001/8000 [22:48<96:09:09, 49.46s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 15:49:59 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
- 13%|████████████████▍                                                                                                                 | 1015/8000 [23:04<2:58:40,  1.53s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-                                                                                                                                                                             
-{'loss': 0.329, 'grad_norm': 4.732964515686035, 'learning_rate': 9.304000000000001e-06, 'epoch': 0.13}
-{'loss': 0.2319, 'grad_norm': 3.3556125164031982, 'learning_rate': 9.270666666666667e-06, 'epoch': 0.13}
-{'loss': 0.174, 'grad_norm': 2.9708847999572754, 'learning_rate': 9.237333333333334e-06, 'epoch': 0.13}
-{'loss': 0.1447, 'grad_norm': 2.841306447982788, 'learning_rate': 9.204e-06, 'epoch': 0.14}
-{'loss': 0.1406, 'grad_norm': 2.7909176349639893, 'learning_rate': 9.170666666666668e-06, 'epoch': 0.14}
-{'loss': 0.151, 'grad_norm': 3.37842059135437, 'learning_rate': 9.137333333333333e-06, 'epoch': 0.14}
-{'loss': 0.1529, 'grad_norm': 3.023977041244507, 'learning_rate': 9.104000000000001e-06, 'epoch': 0.15}
-{'loss': 0.1496, 'grad_norm': 3.015974283218384, 'learning_rate': 9.070666666666668e-06, 'epoch': 0.15}
-{'loss': 0.219, 'grad_norm': 4.30889892578125, 'learning_rate': 9.037333333333334e-06, 'epoch': 0.15}
-{'loss': 0.238, 'grad_norm': 4.160729885101318, 'learning_rate': 9.004e-06, 'epoch': 0.16}
-{'loss': 0.2603, 'grad_norm': 4.687659740447998, 'learning_rate': 8.970666666666667e-06, 'epoch': 0.16}
-{'loss': 0.2666, 'grad_norm': 4.577232837677002, 'learning_rate': 8.937333333333335e-06, 'epoch': 0.16}
-{'loss': 0.2337, 'grad_norm': 5.091732501983643, 'learning_rate': 8.904e-06, 'epoch': 0.17}
-{'loss': 0.2379, 'grad_norm': 4.125801086425781, 'learning_rate': 8.870666666666668e-06, 'epoch': 0.17}
-{'loss': 0.2215, 'grad_norm': 5.142183303833008, 'learning_rate': 8.837333333333334e-06, 'epoch': 0.17}
-{'loss': 0.2136, 'grad_norm': 4.486277103424072, 'learning_rate': 8.804e-06, 'epoch': 0.17}
-{'loss': 0.2214, 'grad_norm': 3.5466482639312744, 'learning_rate': 8.770666666666667e-06, 'epoch': 0.18}
-{'loss': 0.2113, 'grad_norm': 3.6199097633361816, 'learning_rate': 8.737333333333334e-06, 'epoch': 0.18}
-{'loss': 0.1552, 'grad_norm': 2.559951066970825, 'learning_rate': 8.704e-06, 'epoch': 0.18}
-{'loss': 0.1354, 'grad_norm': 2.9152133464813232, 'learning_rate': 8.670666666666666e-06, 'epoch': 0.19}
-{'loss': 0.144, 'grad_norm': 2.608732223510742, 'learning_rate': 8.637333333333335e-06, 'epoch': 0.19}
-{'loss': 0.1367, 'grad_norm': 4.0043416023254395, 'learning_rate': 8.604000000000001e-06, 'epoch': 0.19}
-{'loss': 0.1194, 'grad_norm': 2.3621206283569336, 'learning_rate': 8.570666666666667e-06, 'epoch': 0.2}
-{'loss': 0.1283, 'grad_norm': 2.6970181465148926, 'learning_rate': 8.537333333333334e-06, 'epoch': 0.2}
-{'loss': 0.1858, 'grad_norm': 4.737370014190674, 'learning_rate': 8.504000000000002e-06, 'epoch': 0.2}
-{'loss': 0.1995, 'grad_norm': 3.462738513946533, 'learning_rate': 8.470666666666667e-06, 'epoch': 0.21}
-{'loss': 0.2028, 'grad_norm': 4.608364582061768, 'learning_rate': 8.437333333333335e-06, 'epoch': 0.21}
-{'loss': 0.1952, 'grad_norm': 2.770601987838745, 'learning_rate': 8.404000000000001e-06, 'epoch': 0.21}
-{'loss': 0.1464, 'grad_norm': 3.041656017303467, 'learning_rate': 8.370666666666668e-06, 'epoch': 0.22}
-{'loss': 0.1424, 'grad_norm': 2.988032102584839, 'learning_rate': 8.337333333333334e-06, 'epoch': 0.22}
-{'loss': 0.1233, 'grad_norm': 3.0646026134490967, 'learning_rate': 8.304e-06, 'epoch': 0.22}
-{'loss': 0.1384, 'grad_norm': 2.617403268814087, 'learning_rate': 8.270666666666667e-06, 'epoch': 0.23}
-{'loss': 0.1208, 'grad_norm': 2.6170425415039062, 'learning_rate': 8.237333333333333e-06, 'epoch': 0.23}
-{'loss': 0.1176, 'grad_norm': 2.1296098232269287, 'learning_rate': 8.204000000000001e-06, 'epoch': 0.23}
-{'loss': 0.1189, 'grad_norm': 2.767275810241699, 'learning_rate': 8.170666666666668e-06, 'epoch': 0.23}
-{'loss': 0.1211, 'grad_norm': 2.7053661346435547, 'learning_rate': 8.137333333333334e-06, 'epoch': 0.24}
-{'loss': 0.1156, 'grad_norm': 2.281399965286255, 'learning_rate': 8.104e-06, 'epoch': 0.24}
-{'loss': 0.1517, 'grad_norm': 3.7013635635375977, 'learning_rate': 8.070666666666667e-06, 'epoch': 0.24}
-{'loss': 0.2002, 'grad_norm': 3.7125532627105713, 'learning_rate': 8.037333333333334e-06, 'epoch': 0.25}
-{'loss': 0.1975, 'grad_norm': 3.8716859817504883, 'learning_rate': 8.004e-06, 'epoch': 0.25}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 16:09:35,224 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 16:09:35,311 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 16:09:44,012 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:44,213 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:45,353 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:46,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:48,009 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:49,122 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:50,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:51,439 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:52,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:53,705 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:54,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:55,943 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:56,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:57,978 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:58,906 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:09:59,962 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:00,881 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:01,789 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:02,826 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:03,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:04,701 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:05,692 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:06,679 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:07,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:08,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:09,613 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:10,609 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:11,670 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:12,812 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:13,765 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:14,780 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:15,841 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:16,909 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:17,972 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:18,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:20,066 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:21,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:22,101 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:23,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:24,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:26,400 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:27,375 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:28,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:29,342 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:30,332 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:31,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:32,379 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:33,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:34,359 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:35,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:36,544 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:37,617 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:38,677 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:39,605 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:40,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:41,648 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:42,760 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:43,678 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:44,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:45,707 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:46,693 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:47,755 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:48,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:49,767 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:50,800 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:51,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:52,626 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:53,649 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:54,655 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:55,662 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:56,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:57,658 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:58,766 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:10:59,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:00,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:01,876 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:02,925 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:03,865 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:04,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:05,883 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:06,855 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:07,827 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:08,786 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:09,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:10,854 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:11,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:12,879 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:13,904 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:15,016 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:16,066 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:17,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:18,230 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:19,257 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:20,246 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:21,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:22,318 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:23,359 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:24,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:25,394 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:26,375 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:27,419 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:28,474 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:29,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:30,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:31,620 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:32,753 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:33,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:34,847 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:35,944 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:36,936 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:37,905 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:38,956 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:40,009 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:41,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:41,938 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:42,935 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:43,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:44,962 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:46,048 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:47,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:48,088 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:49,116 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:50,045 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:51,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:52,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:53,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:54,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:55,060 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:56,087 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:57,119 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:58,152 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:11:59,142 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:12:00,092 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
- 25%|████████████████████████████████▌                                                                                                 | 2000/8000 [44:57<1:52:37,  1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:12:08,401 >> Saving model checkpoint to ./checkpoint-2000
-{'eval_loss': 0.2918355464935303, 'eval_wer': 15.873609423202767, 'eval_runtime': 153.1763, 'eval_samples_per_second': 13.736, 'eval_steps_per_second': 0.862, 'epoch': 0.25}
-[INFO|configuration_utils.py:423] 2025-02-12 16:12:08,403 >> Configuration saved in ./checkpoint-2000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 16:12:08,403 >> Configuration saved in ./checkpoint-2000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 16:12:09,828 >> Model weights saved in ./checkpoint-2000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 16:12:09,829 >> Feature extractor saved in ./checkpoint-2000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 16:12:13,102 >> Feature extractor saved in ./preprocessor_config.json
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
- 25%|████████████████████████████████▎                                                                                                | 2001/8000 [45:05<81:49:55, 49.11s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 16:12:17 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
- 25%|████████████████████████████████▋                                                                                                 | 2013/8000 [45:24<3:17:54,  1.98s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-                                                                                                                                                                             
-{'loss': 0.1648, 'grad_norm': 2.4911813735961914, 'learning_rate': 7.970666666666668e-06, 'epoch': 0.25}
-{'loss': 0.1162, 'grad_norm': 2.604146718978882, 'learning_rate': 7.937333333333333e-06, 'epoch': 0.26}
-{'loss': 0.1135, 'grad_norm': 2.7352280616760254, 'learning_rate': 7.904000000000001e-06, 'epoch': 0.26}
-{'loss': 0.1153, 'grad_norm': 2.2932169437408447, 'learning_rate': 7.870666666666667e-06, 'epoch': 0.26}
-{'loss': 0.1005, 'grad_norm': 3.1734797954559326, 'learning_rate': 7.837333333333334e-06, 'epoch': 0.27}
-{'loss': 0.0988, 'grad_norm': 2.4353103637695312, 'learning_rate': 7.804e-06, 'epoch': 0.27}
-{'loss': 0.1028, 'grad_norm': 2.8655478954315186, 'learning_rate': 7.770666666666668e-06, 'epoch': 0.27}
-{'loss': 0.1751, 'grad_norm': 3.800967216491699, 'learning_rate': 7.737333333333335e-06, 'epoch': 0.28}
-{'loss': 0.1798, 'grad_norm': 4.212419509887695, 'learning_rate': 7.704000000000001e-06, 'epoch': 0.28}
-{'loss': 0.199, 'grad_norm': 3.5863020420074463, 'learning_rate': 7.670666666666668e-06, 'epoch': 0.28}
-{'loss': 0.1335, 'grad_norm': 3.1013996601104736, 'learning_rate': 7.637333333333334e-06, 'epoch': 0.28}
-{'loss': 0.0976, 'grad_norm': 2.2462713718414307, 'learning_rate': 7.604e-06, 'epoch': 0.29}
-{'loss': 0.0946, 'grad_norm': 2.9669203758239746, 'learning_rate': 7.570666666666668e-06, 'epoch': 0.29}
-{'loss': 0.0935, 'grad_norm': 2.645289897918701, 'learning_rate': 7.537333333333334e-06, 'epoch': 0.29}
-{'loss': 0.1045, 'grad_norm': 1.9715274572372437, 'learning_rate': 7.5040000000000005e-06, 'epoch': 0.3}
-{'loss': 0.0977, 'grad_norm': 2.1423373222351074, 'learning_rate': 7.470666666666667e-06, 'epoch': 0.3}
-{'loss': 0.1061, 'grad_norm': 2.029958963394165, 'learning_rate': 7.437333333333334e-06, 'epoch': 0.3}
-{'loss': 0.0998, 'grad_norm': 1.972732663154602, 'learning_rate': 7.404e-06, 'epoch': 0.31}
-{'loss': 0.1068, 'grad_norm': 2.2875239849090576, 'learning_rate': 7.370666666666667e-06, 'epoch': 0.31}
-{'loss': 0.1168, 'grad_norm': 3.1778981685638428, 'learning_rate': 7.337333333333334e-06, 'epoch': 0.31}
-{'loss': 0.1524, 'grad_norm': 3.360576868057251, 'learning_rate': 7.304000000000001e-06, 'epoch': 0.32}
-{'loss': 0.1483, 'grad_norm': 3.5467047691345215, 'learning_rate': 7.270666666666667e-06, 'epoch': 0.32}
-{'loss': 0.1775, 'grad_norm': 3.488696575164795, 'learning_rate': 7.237333333333334e-06, 'epoch': 0.32}
-{'loss': 0.135, 'grad_norm': 2.8800296783447266, 'learning_rate': 7.204000000000001e-06, 'epoch': 0.33}
-{'loss': 0.1108, 'grad_norm': 3.1020660400390625, 'learning_rate': 7.170666666666667e-06, 'epoch': 0.33}
-{'loss': 0.1002, 'grad_norm': 2.1233720779418945, 'learning_rate': 7.137333333333334e-06, 'epoch': 0.33}
-{'loss': 0.0941, 'grad_norm': 2.393425703048706, 'learning_rate': 7.104000000000001e-06, 'epoch': 0.33}
-{'loss': 0.0959, 'grad_norm': 2.295924186706543, 'learning_rate': 7.0706666666666665e-06, 'epoch': 0.34}
-{'loss': 0.1116, 'grad_norm': 1.8125039339065552, 'learning_rate': 7.037333333333334e-06, 'epoch': 0.34}
-{'loss': 0.1146, 'grad_norm': 3.006834030151367, 'learning_rate': 7.004000000000001e-06, 'epoch': 0.34}
-{'loss': 0.2029, 'grad_norm': 4.171006679534912, 'learning_rate': 6.970666666666667e-06, 'epoch': 0.35}
-{'loss': 0.1913, 'grad_norm': 3.68646240234375, 'learning_rate': 6.937333333333334e-06, 'epoch': 0.35}
-{'loss': 0.16, 'grad_norm': 3.7463300228118896, 'learning_rate': 6.904e-06, 'epoch': 0.35}
-{'loss': 0.1571, 'grad_norm': 3.069136381149292, 'learning_rate': 6.8706666666666676e-06, 'epoch': 0.36}
-{'loss': 0.1608, 'grad_norm': 3.17172908782959, 'learning_rate': 6.837333333333334e-06, 'epoch': 0.36}
-{'loss': 0.1546, 'grad_norm': 3.1673102378845215, 'learning_rate': 6.804e-06, 'epoch': 0.36}
-{'loss': 0.1282, 'grad_norm': 2.344193935394287, 'learning_rate': 6.770666666666668e-06, 'epoch': 0.37}
-{'loss': 0.0979, 'grad_norm': 2.5321226119995117, 'learning_rate': 6.737333333333333e-06, 'epoch': 0.37}
-{'loss': 0.1049, 'grad_norm': 2.2652363777160645, 'learning_rate': 6.7040000000000005e-06, 'epoch': 0.37}
-{'loss': 0.1433, 'grad_norm': 2.7856993675231934, 'learning_rate': 6.670666666666668e-06, 'epoch': 0.38}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 16:32:16,805 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 16:32:16,805 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 16:32:24,994 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:25,165 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:26,282 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:27,776 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:29,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:30,273 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:31,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:32,739 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:33,771 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:34,884 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:35,931 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:36,990 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:38,037 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:39,057 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:39,996 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:41,081 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:42,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:42,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:43,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:44,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:45,808 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:46,811 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:47,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:48,768 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:49,750 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:50,750 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:51,749 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:52,812 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:53,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:54,901 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:55,908 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:56,978 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:58,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:32:59,089 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:00,086 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:01,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:02,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:03,217 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:04,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:05,324 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:07,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:08,515 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:09,566 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:10,508 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:11,497 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:12,472 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:13,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:14,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:15,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:16,645 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:17,663 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:18,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:19,731 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:20,661 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:21,675 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:22,678 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:23,752 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:24,673 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:25,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:26,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:27,622 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:28,693 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:29,685 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:30,756 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:31,779 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:32,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:33,577 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:34,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:35,631 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:36,667 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:37,676 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:38,674 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:39,764 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:40,785 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:41,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:42,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:43,891 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:44,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:45,810 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:46,829 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:47,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:48,759 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:49,713 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:50,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:51,773 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:52,802 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:53,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:54,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:55,881 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:56,931 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:58,078 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:33:59,112 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:00,159 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:01,125 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:02,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:03,167 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:04,200 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:05,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:06,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:07,179 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:08,207 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:09,226 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:10,272 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:11,252 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:12,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:13,418 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:14,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:15,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:16,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:17,519 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:18,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:19,486 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:20,519 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:21,470 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:22,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:23,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:24,377 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:25,447 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:26,486 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:27,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:28,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:29,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:30,428 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:31,462 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:32,460 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:33,428 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:34,392 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:35,367 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:36,383 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:37,360 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:38,347 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:39,325 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:34:40,266 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
- 38%|████████████████████████████████████████████████                                                                                | 3000/8000 [1:07:37<1:34:23,  1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:34:48,564 >> Saving model checkpoint to ./checkpoint-3000
-{'eval_loss': 0.2720916271209717, 'eval_wer': 13.9010937646069, 'eval_runtime': 151.7576, 'eval_samples_per_second': 13.864, 'eval_steps_per_second': 0.87, 'epoch': 0.38}
-[INFO|configuration_utils.py:423] 2025-02-12 16:34:48,565 >> Configuration saved in ./checkpoint-3000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 16:34:48,566 >> Configuration saved in ./checkpoint-3000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 16:34:49,987 >> Model weights saved in ./checkpoint-3000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 16:34:49,988 >> Feature extractor saved in ./checkpoint-3000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 16:34:53,620 >> Feature extractor saved in ./preprocessor_config.json
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
- 38%|███████████████████████████████████████████████▋                                                                               | 3001/8000 [1:07:45<67:42:47, 48.76s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 16:34:57 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
- 38%|████████████████████████████████████████████████▏                                                                               | 3009/8000 [1:07:56<5:31:43,  3.99s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-                                                                                                                                                                             
-{'loss': 0.1758, 'grad_norm': 4.214677810668945, 'learning_rate': 6.637333333333333e-06, 'epoch': 0.38}
-{'loss': 0.1972, 'grad_norm': 4.144543647766113, 'learning_rate': 6.604000000000001e-06, 'epoch': 0.38}
-{'loss': 0.1293, 'grad_norm': 2.1775295734405518, 'learning_rate': 6.570666666666667e-06, 'epoch': 0.38}
-{'loss': 0.099, 'grad_norm': 2.796152353286743, 'learning_rate': 6.537333333333334e-06, 'epoch': 0.39}
-{'loss': 0.0945, 'grad_norm': 2.1920204162597656, 'learning_rate': 6.504e-06, 'epoch': 0.39}
-{'loss': 0.1118, 'grad_norm': 2.8689582347869873, 'learning_rate': 6.470666666666667e-06, 'epoch': 0.39}
-{'loss': 0.1732, 'grad_norm': 3.580993175506592, 'learning_rate': 6.4373333333333344e-06, 'epoch': 0.4}
-{'loss': 0.1581, 'grad_norm': 3.9165573120117188, 'learning_rate': 6.404e-06, 'epoch': 0.4}
-{'loss': 0.1716, 'grad_norm': 3.8235292434692383, 'learning_rate': 6.370666666666667e-06, 'epoch': 0.4}
-{'loss': 0.1364, 'grad_norm': 3.21138072013855, 'learning_rate': 6.3373333333333345e-06, 'epoch': 0.41}
-{'loss': 0.1459, 'grad_norm': 3.925539255142212, 'learning_rate': 6.304e-06, 'epoch': 0.41}
-{'loss': 0.1668, 'grad_norm': 3.062764883041382, 'learning_rate': 6.270666666666667e-06, 'epoch': 0.41}
-{'loss': 0.1243, 'grad_norm': 2.8379392623901367, 'learning_rate': 6.237333333333334e-06, 'epoch': 0.42}
-{'loss': 0.0979, 'grad_norm': 2.979661226272583, 'learning_rate': 6.204e-06, 'epoch': 0.42}
-{'loss': 0.0848, 'grad_norm': 2.4838883876800537, 'learning_rate': 6.170666666666667e-06, 'epoch': 0.42}
-{'loss': 0.0927, 'grad_norm': 2.3293073177337646, 'learning_rate': 6.137333333333334e-06, 'epoch': 0.42}
-{'loss': 0.0976, 'grad_norm': 3.3497400283813477, 'learning_rate': 6.104000000000001e-06, 'epoch': 0.43}
-{'loss': 0.0881, 'grad_norm': 2.0302255153656006, 'learning_rate': 6.070666666666667e-06, 'epoch': 0.43}
-{'loss': 0.0828, 'grad_norm': 2.112396001815796, 'learning_rate': 6.037333333333334e-06, 'epoch': 0.43}
-{'loss': 0.0983, 'grad_norm': 2.513197183609009, 'learning_rate': 6.004000000000001e-06, 'epoch': 0.44}
-{'loss': 0.0929, 'grad_norm': 2.1429622173309326, 'learning_rate': 5.970666666666667e-06, 'epoch': 0.44}
-{'loss': 0.0916, 'grad_norm': 2.7300236225128174, 'learning_rate': 5.937333333333334e-06, 'epoch': 0.44}
-{'loss': 0.1426, 'grad_norm': 4.011541366577148, 'learning_rate': 5.9040000000000006e-06, 'epoch': 0.45}
-{'loss': 0.163, 'grad_norm': 3.1994545459747314, 'learning_rate': 5.870666666666667e-06, 'epoch': 0.45}
-{'loss': 0.1568, 'grad_norm': 2.98388934135437, 'learning_rate': 5.837333333333333e-06, 'epoch': 0.45}
-{'loss': 0.0937, 'grad_norm': 2.4515798091888428, 'learning_rate': 5.804000000000001e-06, 'epoch': 0.46}
-{'loss': 0.0861, 'grad_norm': 2.0767834186553955, 'learning_rate': 5.770666666666666e-06, 'epoch': 0.46}
-{'loss': 0.0917, 'grad_norm': 2.601104974746704, 'learning_rate': 5.7373333333333335e-06, 'epoch': 0.46}
-{'loss': 0.1022, 'grad_norm': 2.593489408493042, 'learning_rate': 5.704000000000001e-06, 'epoch': 0.47}
-{'loss': 0.1304, 'grad_norm': 3.5832834243774414, 'learning_rate': 5.670666666666668e-06, 'epoch': 0.47}
-{'loss': 0.1634, 'grad_norm': 3.4403560161590576, 'learning_rate': 5.637333333333334e-06, 'epoch': 0.47}
-{'loss': 0.1683, 'grad_norm': 3.6842737197875977, 'learning_rate': 5.604000000000001e-06, 'epoch': 0.47}
-{'loss': 0.1538, 'grad_norm': 3.8382315635681152, 'learning_rate': 5.570666666666667e-06, 'epoch': 0.48}
-{'loss': 0.165, 'grad_norm': 4.207257270812988, 'learning_rate': 5.537333333333334e-06, 'epoch': 0.48}
-{'loss': 0.1558, 'grad_norm': 2.4130444526672363, 'learning_rate': 5.504e-06, 'epoch': 0.48}
-{'loss': 0.1096, 'grad_norm': 2.3981151580810547, 'learning_rate': 5.4706666666666674e-06, 'epoch': 0.49}
-{'loss': 0.0937, 'grad_norm': 2.2837915420532227, 'learning_rate': 5.437333333333333e-06, 'epoch': 0.49}
-{'loss': 0.0876, 'grad_norm': 2.6647775173187256, 'learning_rate': 5.404e-06, 'epoch': 0.49}
-{'loss': 0.15, 'grad_norm': 3.7677643299102783, 'learning_rate': 5.3706666666666675e-06, 'epoch': 0.5}
-{'loss': 0.1925, 'grad_norm': 3.542175769805908, 'learning_rate': 5.337333333333333e-06, 'epoch': 0.5}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 16:54:50,654 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 16:54:50,654 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 16:54:58,812 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:54:58,999 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:00,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:01,715 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:03,007 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:04,217 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:05,579 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:06,663 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:07,716 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:08,870 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:09,965 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:11,072 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:12,108 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:13,097 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:14,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:15,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:15,989 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:16,893 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:17,929 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:18,887 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:19,797 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:20,802 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:21,787 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:22,749 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:23,731 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:24,709 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:25,688 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:26,745 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:27,889 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:28,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:29,817 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:30,877 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:31,914 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:32,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:34,005 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:35,132 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:36,186 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:37,147 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:38,209 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:39,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:40,203 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:41,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:42,259 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:43,227 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:44,221 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:45,189 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:46,268 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:47,252 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:48,255 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:49,402 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:50,418 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:51,459 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:52,515 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:53,447 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:54,476 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:55,511 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:56,620 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:57,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:58,475 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:55:59,523 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:00,465 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:01,524 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:02,521 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:03,583 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:04,595 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:05,503 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:06,411 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:07,417 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:08,417 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:09,431 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:10,421 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:11,400 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:12,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:13,455 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:14,528 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:15,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:16,543 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:17,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:18,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:19,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:20,442 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:21,387 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:22,327 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:23,330 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:24,372 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:25,405 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:26,355 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:27,363 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:28,457 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:29,472 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:30,595 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:31,624 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:32,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:33,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:34,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:35,646 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:36,674 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:37,687 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:38,697 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:39,660 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:40,685 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:41,723 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:42,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:43,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:44,867 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:45,989 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:47,016 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:48,077 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:49,183 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:50,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:51,120 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:52,155 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:53,192 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:54,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:55,101 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:56,092 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:57,046 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:58,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:56:59,145 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:00,139 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:01,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:02,173 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:03,111 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:04,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:05,210 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:06,183 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:07,142 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:08,122 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:09,142 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:10,120 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:11,120 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:12,097 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 16:57:13,020 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
- 50%|████████████████████████████████████████████████████████████████                                                                | 4000/8000 [1:30:10<1:15:16,  1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:57:21,319 >> Saving model checkpoint to ./checkpoint-4000
-{'eval_loss': 0.25648659467697144, 'eval_wer': 12.7372160418809, 'eval_runtime': 150.6646, 'eval_samples_per_second': 13.965, 'eval_steps_per_second': 0.876, 'epoch': 0.5}
-[INFO|configuration_utils.py:423] 2025-02-12 16:57:21,320 >> Configuration saved in ./checkpoint-4000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 16:57:21,321 >> Configuration saved in ./checkpoint-4000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 16:57:22,731 >> Model weights saved in ./checkpoint-4000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 16:57:22,732 >> Feature extractor saved in ./checkpoint-4000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 16:57:26,282 >> Feature extractor saved in ./preprocessor_config.json
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
- 50%|███████████████████████████████████████████████████████████████▌                                                               | 4001/8000 [1:30:19<54:04:14, 48.68s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 16:57:30 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
- 50%|████████████████████████████████████████████████████████████████▏                                                               | 4008/8000 [1:30:28<5:43:05,  5.16s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
- 57%|█████████████████████████████████████████████████████████████████████████▌                                                      | 4600/8000 [1:43:49<1:03:53,  1.13s/it]'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 415cf487-aa47-4982-8de5-bb40b3cb3a69)')' thrown while requesting GET https://huggingface.co/datasets/asierhv/composite_corpus_eu_v2.1/resolve/2c2153d993ce951528b5b62eb207632c6d267c29/data/train-00018-of-00150.parquet
-{'loss': 0.1434, 'grad_norm': 2.5672571659088135, 'learning_rate': 5.304e-06, 'epoch': 0.5}
-{'loss': 0.2075, 'grad_norm': 4.591808319091797, 'learning_rate': 5.270666666666668e-06, 'epoch': 0.51}
-{'loss': 0.1478, 'grad_norm': 3.485185146331787, 'learning_rate': 5.237333333333334e-06, 'epoch': 0.51}
-{'loss': 0.1383, 'grad_norm': 2.5995991230010986, 'learning_rate': 5.2040000000000005e-06, 'epoch': 0.51}
-{'loss': 0.0959, 'grad_norm': 2.4682819843292236, 'learning_rate': 5.170666666666667e-06, 'epoch': 0.52}
-{'loss': 0.0857, 'grad_norm': 2.436518669128418, 'learning_rate': 5.137333333333334e-06, 'epoch': 0.52}
-{'loss': 0.0862, 'grad_norm': 2.0344107151031494, 'learning_rate': 5.104e-06, 'epoch': 0.52}
-{'loss': 0.0808, 'grad_norm': 1.6771937608718872, 'learning_rate': 5.070666666666667e-06, 'epoch': 0.53}
-{'loss': 0.0872, 'grad_norm': 1.7831439971923828, 'learning_rate': 5.037333333333334e-06, 'epoch': 0.53}
-{'loss': 0.0832, 'grad_norm': 2.228795051574707, 'learning_rate': 5.004e-06, 'epoch': 0.53}
-{'loss': 0.0927, 'grad_norm': 3.1402647495269775, 'learning_rate': 4.970666666666667e-06, 'epoch': 0.53}
-{'loss': 0.1477, 'grad_norm': 3.662506580352783, 'learning_rate': 4.937333333333334e-06, 'epoch': 0.54}
-{'loss': 0.1262, 'grad_norm': 2.865934371948242, 'learning_rate': 4.904000000000001e-06, 'epoch': 0.54}
-{'loss': 0.1329, 'grad_norm': 3.2233200073242188, 'learning_rate': 4.870666666666667e-06, 'epoch': 0.54}
-{'loss': 0.0795, 'grad_norm': 2.093703269958496, 'learning_rate': 4.837333333333334e-06, 'epoch': 0.55}
-{'loss': 0.0715, 'grad_norm': 1.7601807117462158, 'learning_rate': 4.804e-06, 'epoch': 0.55}
-{'loss': 0.0797, 'grad_norm': 2.1606643199920654, 'learning_rate': 4.770666666666667e-06, 'epoch': 0.55}
-{'loss': 0.0883, 'grad_norm': 2.565343141555786, 'learning_rate': 4.737333333333334e-06, 'epoch': 0.56}
-{'loss': 0.0965, 'grad_norm': 2.062619924545288, 'learning_rate': 4.704e-06, 'epoch': 0.56}
-{'loss': 0.0891, 'grad_norm': 2.2219879627227783, 'learning_rate': 4.6706666666666675e-06, 'epoch': 0.56}
-{'loss': 0.1147, 'grad_norm': 2.857029676437378, 'learning_rate': 4.637333333333334e-06, 'epoch': 0.57}
-{'loss': 0.144, 'grad_norm': 3.090247392654419, 'learning_rate': 4.604e-06, 'epoch': 0.57}
-{'loss': 0.1451, 'grad_norm': 3.8906264305114746, 'learning_rate': 4.570666666666667e-06, 'epoch': 0.57}
-{'loss': 0.1475, 'grad_norm': 3.7733590602874756, 'learning_rate': 4.537333333333334e-06, 'epoch': 0.57}
-02/12/2025 17:11:00 - WARNING - huggingface_hub.utils._http - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 415cf487-aa47-4982-8de5-bb40b3cb3a69)')' thrown while requesting GET https://huggingface.co/datasets/asierhv/composite_corpus_eu_v2.1/resolve/2c2153d993ce951528b5b62eb207632c6d267c29/data/train-00018-of-00150.parquet
-Retrying in 1s [Retry 1/5].
-02/12/2025 17:11:00 - WARNING - huggingface_hub.utils._http - Retrying in 1s [Retry 1/5].
- 62%|█████████████████████████████████████████████████████████████████████████████████▎                                                | 5000/8000 [1:52:57<58:08,  1.16s/it][INFO|trainer.py:4176] 2025-02-12 17:20:08,509 >>
-{'loss': 0.1509, 'grad_norm': 3.379163980484009, 'learning_rate': 4.504e-06, 'epoch': 0.58}
-{'loss': 0.1444, 'grad_norm': 3.4210824966430664, 'learning_rate': 4.470666666666667e-06, 'epoch': 0.58}
-{'loss': 0.1295, 'grad_norm': 3.7809910774230957, 'learning_rate': 4.437333333333333e-06, 'epoch': 0.58}
-{'loss': 0.1158, 'grad_norm': 2.537574052810669, 'learning_rate': 4.4040000000000005e-06, 'epoch': 0.59}
-{'loss': 0.1249, 'grad_norm': 3.482285261154175, 'learning_rate': 4.370666666666667e-06, 'epoch': 0.59}
-{'loss': 0.1238, 'grad_norm': 3.0114011764526367, 'learning_rate': 4.337333333333334e-06, 'epoch': 0.59}
-{'loss': 0.0888, 'grad_norm': 2.117215394973755, 'learning_rate': 4.304000000000001e-06, 'epoch': 0.6}
-{'loss': 0.0972, 'grad_norm': 2.0158379077911377, 'learning_rate': 4.270666666666667e-06, 'epoch': 0.6}
-{'loss': 0.0793, 'grad_norm': 2.5208640098571777, 'learning_rate': 4.2373333333333335e-06, 'epoch': 0.6}
-{'loss': 0.1035, 'grad_norm': 2.820002555847168, 'learning_rate': 4.204e-06, 'epoch': 0.61}
-{'loss': 0.1128, 'grad_norm': 3.1144282817840576, 'learning_rate': 4.170666666666667e-06, 'epoch': 0.61}
-{'loss': 0.1217, 'grad_norm': 3.1345527172088623, 'learning_rate': 4.137333333333334e-06, 'epoch': 0.61}
-{'loss': 0.1061, 'grad_norm': 2.2702696323394775, 'learning_rate': 4.104e-06, 'epoch': 0.62}
-{'loss': 0.0919, 'grad_norm': 2.714102268218994, 'learning_rate': 4.072e-06, 'epoch': 0.62}
-{'loss': 0.0855, 'grad_norm': 2.448854923248291, 'learning_rate': 4.0386666666666666e-06, 'epoch': 0.62}
-{'loss': 0.0818, 'grad_norm': 2.9392127990722656, 'learning_rate': 4.005333333333334e-06, 'epoch': 0.62}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 17:20:08,509 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 17:20:08,509 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 17:20:24,466 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:24,644 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:25,696 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:27,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:28,226 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:29,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:30,337 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:31,421 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:32,606 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:33,725 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:34,928 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:36,023 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:37,063 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:38,060 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:38,972 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:40,096 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:41,013 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:41,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:42,954 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:43,909 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:44,822 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:45,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:46,805 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:47,791 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:48,772 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:49,751 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:50,777 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:51,848 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:53,018 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:53,949 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:54,953 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:56,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:57,050 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:58,105 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:20:59,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:00,186 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:01,253 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:02,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:03,298 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:04,334 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:06,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:07,528 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:08,547 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:09,474 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:10,457 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:11,438 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:12,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:13,546 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:14,512 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:15,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:16,624 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:17,677 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:18,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:19,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:20,847 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:21,908 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:23,107 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:24,023 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:24,971 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:26,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:26,981 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:28,042 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:29,030 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:30,075 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:31,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:32,046 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:32,943 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:33,966 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:34,964 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:35,964 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:36,954 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:37,941 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:39,031 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:40,029 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:41,103 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:42,101 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:43,121 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:44,058 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:45,056 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:46,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:47,057 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:48,025 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:48,993 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:50,011 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:51,071 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:52,108 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:53,099 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:54,122 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:55,245 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:56,295 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:57,450 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:58,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:21:59,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:00,457 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:01,456 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:02,577 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:03,618 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:04,668 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:05,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:06,874 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:07,937 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:08,961 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:10,020 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:10,991 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:12,065 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:13,172 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:14,180 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:15,210 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:16,291 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:17,283 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:18,236 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:19,270 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:20,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:21,261 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:22,220 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:23,229 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:24,175 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:25,245 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:26,288 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:27,280 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:28,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:29,282 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:30,210 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:31,251 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:32,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:33,232 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:34,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:35,172 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:36,205 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:37,205 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:38,228 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:39,232 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:22:40,181 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
- 62%|█████████████████████████████████████████████████████████████████████████████████▎                                                | 5000/8000 [1:55:37<58:08,  1.16s/it][INFO|trainer.py:3860] 2025-02-12 17:22:48,522 >> Saving model checkpoint to ./checkpoint-5000
-{'eval_loss': 0.2562941014766693, 'eval_wer': 11.942600729176405, 'eval_runtime': 160.0125, 'eval_samples_per_second': 13.149, 'eval_steps_per_second': 0.825, 'epoch': 0.62}
-[INFO|configuration_utils.py:423] 2025-02-12 17:22:48,523 >> Configuration saved in ./checkpoint-5000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 17:22:48,524 >> Configuration saved in ./checkpoint-5000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 17:22:49,924 >> Model weights saved in ./checkpoint-5000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 17:22:49,926 >> Feature extractor saved in ./checkpoint-5000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 17:22:53,468 >> Feature extractor saved in ./preprocessor_config.json
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
- 63%|███████████████████████████████████████████████████████████████████████████████▍                                               | 5001/8000 [1:55:46<42:48:38, 51.39s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 17:22:57 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
- 63%|████████████████████████████████████████████████████████████████████████████████                                                | 5006/8000 [1:55:53<8:05:45,  9.73s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-                                                                                                                                                                             
-{'loss': 0.1203, 'grad_norm': 2.4964210987091064, 'learning_rate': 3.972e-06, 'epoch': 0.63}
-{'loss': 0.111, 'grad_norm': 3.330078125, 'learning_rate': 3.938666666666667e-06, 'epoch': 0.63}
-{'loss': 0.164, 'grad_norm': 3.6872191429138184, 'learning_rate': 3.905333333333334e-06, 'epoch': 0.63}
-{'loss': 0.1515, 'grad_norm': 3.728769063949585, 'learning_rate': 3.872e-06, 'epoch': 0.64}
-{'loss': 0.1334, 'grad_norm': 3.4183156490325928, 'learning_rate': 3.838666666666667e-06, 'epoch': 0.64}
-{'loss': 0.134, 'grad_norm': 3.4580440521240234, 'learning_rate': 3.8053333333333336e-06, 'epoch': 0.64}
-{'loss': 0.1088, 'grad_norm': 2.2719855308532715, 'learning_rate': 3.772e-06, 'epoch': 0.65}
-{'loss': 0.0724, 'grad_norm': 2.3186910152435303, 'learning_rate': 3.7386666666666673e-06, 'epoch': 0.65}
-{'loss': 0.0759, 'grad_norm': 1.8175565004348755, 'learning_rate': 3.7053333333333337e-06, 'epoch': 0.65}
-{'loss': 0.0813, 'grad_norm': 2.0874826908111572, 'learning_rate': 3.6720000000000006e-06, 'epoch': 0.66}
-{'loss': 0.0824, 'grad_norm': 1.9950120449066162, 'learning_rate': 3.638666666666667e-06, 'epoch': 0.66}
-{'loss': 0.0835, 'grad_norm': 2.6349194049835205, 'learning_rate': 3.6053333333333334e-06, 'epoch': 0.66}
-{'loss': 0.0823, 'grad_norm': 2.7667415142059326, 'learning_rate': 3.5720000000000003e-06, 'epoch': 0.67}
-{'loss': 0.1077, 'grad_norm': 3.617748260498047, 'learning_rate': 3.538666666666667e-06, 'epoch': 0.67}
-{'loss': 0.1268, 'grad_norm': 3.2603073120117188, 'learning_rate': 3.5053333333333335e-06, 'epoch': 0.67}
-{'loss': 0.1206, 'grad_norm': 2.9681355953216553, 'learning_rate': 3.4720000000000004e-06, 'epoch': 0.68}
-{'loss': 0.1279, 'grad_norm': 4.156548500061035, 'learning_rate': 3.438666666666667e-06, 'epoch': 0.68}
-{'loss': 0.1177, 'grad_norm': 3.2013888359069824, 'learning_rate': 3.4053333333333337e-06, 'epoch': 0.68}
-{'loss': 0.0946, 'grad_norm': 3.299403190612793, 'learning_rate': 3.372e-06, 'epoch': 0.68}
-{'loss': 0.0944, 'grad_norm': 2.39630389213562, 'learning_rate': 3.338666666666667e-06, 'epoch': 0.69}
-{'loss': 0.1149, 'grad_norm': 3.7624928951263428, 'learning_rate': 3.3053333333333338e-06, 'epoch': 0.69}
-{'loss': 0.1373, 'grad_norm': 3.3170886039733887, 'learning_rate': 3.272e-06, 'epoch': 0.69}
-{'loss': 0.1056, 'grad_norm': 2.2296531200408936, 'learning_rate': 3.238666666666667e-06, 'epoch': 0.7}
-{'loss': 0.0724, 'grad_norm': 1.8995999097824097, 'learning_rate': 3.2053333333333334e-06, 'epoch': 0.7}
-{'loss': 0.0604, 'grad_norm': 2.3782520294189453, 'learning_rate': 3.172e-06, 'epoch': 0.7}
-{'loss': 0.0581, 'grad_norm': 2.2558810710906982, 'learning_rate': 3.138666666666667e-06, 'epoch': 0.71}
-{'loss': 0.0713, 'grad_norm': 2.4040448665618896, 'learning_rate': 3.1053333333333336e-06, 'epoch': 0.71}
-{'loss': 0.0773, 'grad_norm': 2.5696732997894287, 'learning_rate': 3.072e-06, 'epoch': 0.71}
-{'loss': 0.0765, 'grad_norm': 2.237166404724121, 'learning_rate': 3.038666666666667e-06, 'epoch': 0.72}
-{'loss': 0.0779, 'grad_norm': 1.8783671855926514, 'learning_rate': 3.0053333333333332e-06, 'epoch': 0.72}
-{'loss': 0.0751, 'grad_norm': 2.096334457397461, 'learning_rate': 2.9720000000000005e-06, 'epoch': 0.72}
-{'loss': 0.0711, 'grad_norm': 2.0362164974212646, 'learning_rate': 2.938666666666667e-06, 'epoch': 0.72}
-{'loss': 0.0635, 'grad_norm': 1.7136311531066895, 'learning_rate': 2.9053333333333334e-06, 'epoch': 0.73}
-{'loss': 0.0698, 'grad_norm': 2.754848003387451, 'learning_rate': 2.872e-06, 'epoch': 0.73}
-{'loss': 0.0741, 'grad_norm': 2.058065176010132, 'learning_rate': 2.8386666666666666e-06, 'epoch': 0.73}
-{'loss': 0.0938, 'grad_norm': 3.0389583110809326, 'learning_rate': 2.805333333333334e-06, 'epoch': 0.74}
-{'loss': 0.1387, 'grad_norm': 3.4811720848083496, 'learning_rate': 2.7720000000000003e-06, 'epoch': 0.74}
-{'loss': 0.1283, 'grad_norm': 3.2388477325439453, 'learning_rate': 2.7386666666666667e-06, 'epoch': 0.74}
-{'loss': 0.1073, 'grad_norm': 3.083925247192383, 'learning_rate': 2.7053333333333336e-06, 'epoch': 0.75}
-{'loss': 0.1038, 'grad_norm': 2.6847918033599854, 'learning_rate': 2.672e-06, 'epoch': 0.75}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 17:43:07,939 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 17:43:07,939 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 17:43:22,192 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:22,365 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:23,580 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:24,998 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:26,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:27,276 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:28,426 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:29,638 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:30,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:31,992 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:33,202 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:34,361 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:35,413 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:36,412 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:37,319 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:38,399 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:39,321 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:40,237 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:41,294 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:42,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:43,182 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:44,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:45,180 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:46,166 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:47,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:48,155 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:49,156 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:50,230 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:51,385 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:52,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:53,343 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:54,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:55,475 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:56,547 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:57,541 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:58,648 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:43:59,691 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:00,661 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:01,761 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:02,783 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:03,752 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:04,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:05,767 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:06,710 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:07,708 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:08,679 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:09,757 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:10,772 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:11,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:12,871 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:13,884 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:14,922 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:15,966 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:16,906 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:17,924 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:18,944 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:20,063 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:21,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:21,957 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:23,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:23,958 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:25,029 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:26,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:27,079 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:28,098 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:29,042 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:29,943 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:30,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:32,029 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:33,081 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:34,111 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:35,117 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:36,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:37,255 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:38,335 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:39,362 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:40,399 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:41,342 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:42,334 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:43,368 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:44,346 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:45,347 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:46,329 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:47,362 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:48,419 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:49,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:50,453 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:51,480 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:52,610 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:53,644 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:54,788 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:55,817 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:56,829 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:57,839 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:58,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:44:59,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:01,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:02,055 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:03,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:04,056 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:05,098 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:06,126 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:07,194 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:08,191 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:09,283 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:10,424 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:11,448 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:12,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:13,598 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:14,587 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:15,548 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:16,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:17,638 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:18,603 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:19,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:20,634 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:21,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:22,663 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:23,696 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:24,697 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:25,694 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:26,730 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:27,669 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:28,722 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:29,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:30,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:31,713 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:32,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:33,710 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:34,712 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:35,718 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:36,698 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 17:45:37,644 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
- 75%|█████████████████████████████████████████████████████████████████████████████████████████████████▌                                | 6000/8000 [2:18:35<38:52,  1.17s/it][INFO|trainer.py:3860] 2025-02-12 17:45:46,009 >> Saving model checkpoint to ./checkpoint-6000
-{'eval_loss': 0.23902159929275513, 'eval_wer': 11.07319809292325, 'eval_runtime': 158.0693, 'eval_samples_per_second': 13.311, 'eval_steps_per_second': 0.835, 'epoch': 0.75}
-[INFO|configuration_utils.py:423] 2025-02-12 17:45:46,010 >> Configuration saved in ./checkpoint-6000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 17:45:46,011 >> Configuration saved in ./checkpoint-6000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 17:45:47,414 >> Model weights saved in ./checkpoint-6000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 17:45:47,415 >> Feature extractor saved in ./checkpoint-6000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 17:45:50,918 >> Feature extractor saved in ./preprocessor_config.json
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
- 75%|███████████████████████████████████████████████████████████████████████████████████████████████▎                               | 6002/8000 [2:18:44<19:50:45, 35.76s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 17:45:56 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
- 75%|█████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 6017/8000 [2:19:02<44:13,  1.34s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-                                                                                                                                                                             
-{'loss': 0.0987, 'grad_norm': 2.7315189838409424, 'learning_rate': 2.6386666666666673e-06, 'epoch': 0.75}
-{'loss': 0.0858, 'grad_norm': 2.3389735221862793, 'learning_rate': 2.6053333333333337e-06, 'epoch': 0.76}
-{'loss': 0.0764, 'grad_norm': 1.982534646987915, 'learning_rate': 2.572e-06, 'epoch': 0.76}
-{'loss': 0.0731, 'grad_norm': 1.9040074348449707, 'learning_rate': 2.538666666666667e-06, 'epoch': 0.76}
-{'loss': 0.0758, 'grad_norm': 2.654710054397583, 'learning_rate': 2.5053333333333334e-06, 'epoch': 0.77}
-{'loss': 0.0824, 'grad_norm': 2.6400296688079834, 'learning_rate': 2.4720000000000002e-06, 'epoch': 0.77}
-{'loss': 0.0822, 'grad_norm': 7.269197463989258, 'learning_rate': 2.438666666666667e-06, 'epoch': 0.77}
-{'loss': 0.0818, 'grad_norm': 2.363656520843506, 'learning_rate': 2.4053333333333335e-06, 'epoch': 0.78}
-{'loss': 0.0768, 'grad_norm': 2.4660115242004395, 'learning_rate': 2.3720000000000003e-06, 'epoch': 0.78}
-{'loss': 0.0783, 'grad_norm': 3.3116371631622314, 'learning_rate': 2.3386666666666668e-06, 'epoch': 0.78}
-{'loss': 0.0899, 'grad_norm': 2.595853090286255, 'learning_rate': 2.3053333333333336e-06, 'epoch': 0.78}
-{'loss': 0.0953, 'grad_norm': 2.709597587585449, 'learning_rate': 2.2720000000000004e-06, 'epoch': 0.79}
-{'loss': 0.1249, 'grad_norm': 2.4446637630462646, 'learning_rate': 2.238666666666667e-06, 'epoch': 0.79}
-{'loss': 0.1171, 'grad_norm': 3.4412341117858887, 'learning_rate': 2.2053333333333333e-06, 'epoch': 0.79}
-{'loss': 0.1065, 'grad_norm': 2.2719008922576904, 'learning_rate': 2.172e-06, 'epoch': 0.8}
-{'loss': 0.0872, 'grad_norm': 1.9873290061950684, 'learning_rate': 2.138666666666667e-06, 'epoch': 0.8}
-{'loss': 0.0765, 'grad_norm': 2.487403392791748, 'learning_rate': 2.1053333333333334e-06, 'epoch': 0.8}
-{'loss': 0.0736, 'grad_norm': 2.4424736499786377, 'learning_rate': 2.0720000000000002e-06, 'epoch': 0.81}
-{'loss': 0.1064, 'grad_norm': 3.1507577896118164, 'learning_rate': 2.0386666666666667e-06, 'epoch': 0.81}
-{'loss': 0.0993, 'grad_norm': 2.6285648345947266, 'learning_rate': 2.0053333333333335e-06, 'epoch': 0.81}
-{'loss': 0.1299, 'grad_norm': 4.1934967041015625, 'learning_rate': 1.972e-06, 'epoch': 0.82}
-{'loss': 0.1195, 'grad_norm': 3.031852960586548, 'learning_rate': 1.9386666666666668e-06, 'epoch': 0.82}
-{'loss': 0.1197, 'grad_norm': 2.9288837909698486, 'learning_rate': 1.9053333333333334e-06, 'epoch': 0.82}
-{'loss': 0.1127, 'grad_norm': 2.890054225921631, 'learning_rate': 1.8720000000000002e-06, 'epoch': 0.82}
-{'loss': 0.1155, 'grad_norm': 3.130406618118286, 'learning_rate': 1.8386666666666669e-06, 'epoch': 0.83}
-{'loss': 0.1291, 'grad_norm': 2.7169485092163086, 'learning_rate': 1.8053333333333333e-06, 'epoch': 0.83}
-{'loss': 0.1097, 'grad_norm': 2.7390034198760986, 'learning_rate': 1.7720000000000001e-06, 'epoch': 0.83}
-{'loss': 0.1022, 'grad_norm': 2.161604166030884, 'learning_rate': 1.7386666666666668e-06, 'epoch': 0.84}
-{'loss': 0.0779, 'grad_norm': 2.210451126098633, 'learning_rate': 1.7053333333333336e-06, 'epoch': 0.84}
-{'loss': 0.0728, 'grad_norm': 2.426438808441162, 'learning_rate': 1.672e-06, 'epoch': 0.84}
-{'loss': 0.0859, 'grad_norm': 2.8744237422943115, 'learning_rate': 1.6386666666666667e-06, 'epoch': 0.85}
-{'loss': 0.1496, 'grad_norm': 2.8165483474731445, 'learning_rate': 1.6053333333333335e-06, 'epoch': 0.85}
-{'loss': 0.1343, 'grad_norm': 4.0077738761901855, 'learning_rate': 1.5720000000000002e-06, 'epoch': 0.85}
-{'loss': 0.1397, 'grad_norm': 3.8011586666107178, 'learning_rate': 1.538666666666667e-06, 'epoch': 0.86}
-{'loss': 0.1262, 'grad_norm': 2.7379047870635986, 'learning_rate': 1.5053333333333334e-06, 'epoch': 0.86}
-{'loss': 0.1188, 'grad_norm': 3.250950574874878, 'learning_rate': 1.472e-06, 'epoch': 0.86}
-{'loss': 0.1103, 'grad_norm': 2.782945156097412, 'learning_rate': 1.438666666666667e-06, 'epoch': 0.87}
-{'loss': 0.1147, 'grad_norm': 3.08154034614563, 'learning_rate': 1.4053333333333335e-06, 'epoch': 0.87}
-{'loss': 0.1332, 'grad_norm': 3.5768070220947266, 'learning_rate': 1.372e-06, 'epoch': 0.87}
-{'loss': 0.1282, 'grad_norm': 3.155341863632202, 'learning_rate': 1.3386666666666668e-06, 'epoch': 0.88}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 18:05:21,315 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 18:05:21,315 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 18:05:29,617 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:29,786 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:31,140 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:32,626 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:34,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:35,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:36,498 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:37,606 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:38,714 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:39,829 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:40,947 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:41,951 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:43,000 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:43,995 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:46,219 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:47,289 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:48,188 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:49,089 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:50,124 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:51,096 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:52,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:53,010 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:54,011 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:54,997 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:56,008 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:56,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:57,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:05:59,069 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:00,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:01,182 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:02,235 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:03,336 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:04,394 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:05,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:06,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:07,602 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:08,666 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:09,635 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:10,772 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:11,802 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:14,040 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:15,034 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:16,090 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:17,033 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:18,017 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:18,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:20,075 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:21,080 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:22,056 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:23,182 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:24,195 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:25,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:26,311 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:27,249 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:28,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:29,277 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:30,376 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:31,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:32,249 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:33,311 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:34,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:35,325 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:36,313 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:37,365 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:38,382 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:39,310 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:40,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:41,232 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:42,247 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:43,299 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:44,301 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:45,300 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:46,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:47,435 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:48,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:49,528 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:50,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:51,520 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:52,499 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:53,512 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:54,484 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:55,473 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:56,427 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:57,442 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:58,505 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:06:59,556 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:00,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:01,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:02,709 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:03,769 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:04,936 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:05,980 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:07,015 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:08,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:08,995 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:10,103 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:11,162 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:12,199 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:13,229 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:14,216 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:15,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:16,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:17,337 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:18,322 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:19,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:20,559 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:21,607 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:22,656 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:23,769 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:24,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:25,710 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:26,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:27,805 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:28,761 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:29,703 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:30,724 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:31,673 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:32,752 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:33,806 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:34,818 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:35,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:36,851 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:37,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:38,853 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:39,863 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:40,840 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:41,815 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:42,803 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:43,835 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:44,832 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:45,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:46,820 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:07:47,748 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
- 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 7000/8000 [2:40:45<18:54,  1.13s/it][INFO|trainer.py:3860] 2025-02-12 18:07:56,147 >> Saving model checkpoint to ./checkpoint-7000
-{'eval_loss': 0.23438745737075806, 'eval_wer': 11.391044218005048, 'eval_runtime': 154.8314, 'eval_samples_per_second': 13.589, 'eval_steps_per_second': 0.853, 'epoch': 0.88}
-[INFO|configuration_utils.py:423] 2025-02-12 18:07:56,148 >> Configuration saved in ./checkpoint-7000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 18:07:56,149 >> Configuration saved in ./checkpoint-7000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 18:07:57,530 >> Model weights saved in ./checkpoint-7000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 18:07:57,531 >> Feature extractor saved in ./checkpoint-7000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 18:08:00,757 >> Feature extractor saved in ./preprocessor_config.json
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
-  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
- 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 7001/8000 [2:40:54<13:49:55, 49.85s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 18:08:05 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
- 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 7005/8000 [2:41:00<3:37:53, 13.14s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-                                                                                                                                                                             
-{'loss': 0.2406, 'grad_norm': 11.062019348144531, 'learning_rate': 1.308e-06, 'epoch': 0.88}
-{'loss': 0.3469, 'grad_norm': 4.648179531097412, 'learning_rate': 1.2746666666666669e-06, 'epoch': 0.88}
-{'loss': 0.3421, 'grad_norm': 4.388245105743408, 'learning_rate': 1.2413333333333335e-06, 'epoch': 0.88}
-{'loss': 0.2847, 'grad_norm': 4.806427478790283, 'learning_rate': 1.2080000000000001e-06, 'epoch': 0.89}
-{'loss': 0.1671, 'grad_norm': 3.0818049907684326, 'learning_rate': 1.1746666666666668e-06, 'epoch': 0.89}
-{'loss': 0.1313, 'grad_norm': 4.117819309234619, 'learning_rate': 1.1413333333333334e-06, 'epoch': 0.89}
-{'loss': 0.1177, 'grad_norm': 2.8558835983276367, 'learning_rate': 1.108e-06, 'epoch': 0.9}
-{'loss': 0.0911, 'grad_norm': 3.0425021648406982, 'learning_rate': 1.0746666666666669e-06, 'epoch': 0.9}
-{'loss': 0.0898, 'grad_norm': 2.6587588787078857, 'learning_rate': 1.0413333333333333e-06, 'epoch': 0.9}
-{'loss': 0.0922, 'grad_norm': 1.7572664022445679, 'learning_rate': 1.0080000000000001e-06, 'epoch': 0.91}
-{'loss': 0.0753, 'grad_norm': 2.00393009185791, 'learning_rate': 9.746666666666668e-07, 'epoch': 0.91}
-{'loss': 0.0628, 'grad_norm': 1.845981478691101, 'learning_rate': 9.413333333333334e-07, 'epoch': 0.91}
-{'loss': 0.0696, 'grad_norm': 2.008112907409668, 'learning_rate': 9.080000000000001e-07, 'epoch': 0.92}
-{'loss': 0.0897, 'grad_norm': 2.837357759475708, 'learning_rate': 8.746666666666668e-07, 'epoch': 0.92}
-{'loss': 0.1227, 'grad_norm': 2.4842417240142822, 'learning_rate': 8.413333333333334e-07, 'epoch': 0.92}
-{'loss': 0.1012, 'grad_norm': 2.7866716384887695, 'learning_rate': 8.08e-07, 'epoch': 0.93}
-{'loss': 0.1141, 'grad_norm': 2.1826930046081543, 'learning_rate': 7.746666666666668e-07, 'epoch': 0.93}
-{'loss': 0.0754, 'grad_norm': 2.014090061187744, 'learning_rate': 7.413333333333333e-07, 'epoch': 0.93}
-{'loss': 0.0736, 'grad_norm': 2.1539175510406494, 'learning_rate': 7.08e-07, 'epoch': 0.93}
-{'loss': 0.0684, 'grad_norm': 2.712541341781616, 'learning_rate': 6.746666666666667e-07, 'epoch': 0.94}
-{'loss': 0.1414, 'grad_norm': 3.281242847442627, 'learning_rate': 6.413333333333334e-07, 'epoch': 0.94}
-{'loss': 0.1895, 'grad_norm': 4.088025093078613, 'learning_rate': 6.08e-07, 'epoch': 0.94}
-{'loss': 0.222, 'grad_norm': 4.144560813903809, 'learning_rate': 5.746666666666667e-07, 'epoch': 0.95}
-{'loss': 0.1349, 'grad_norm': 1.8468823432922363, 'learning_rate': 5.413333333333334e-07, 'epoch': 0.95}
-{'loss': 0.0872, 'grad_norm': 2.5354621410369873, 'learning_rate': 5.08e-07, 'epoch': 0.95}
-{'loss': 0.0725, 'grad_norm': 1.83882737159729, 'learning_rate': 4.746666666666667e-07, 'epoch': 0.96}
-{'loss': 0.0988, 'grad_norm': 3.42556095123291, 'learning_rate': 4.413333333333333e-07, 'epoch': 0.96}
-{'loss': 0.1166, 'grad_norm': 2.682558059692383, 'learning_rate': 4.0800000000000005e-07, 'epoch': 0.96}
-{'loss': 0.1257, 'grad_norm': 3.2471797466278076, 'learning_rate': 3.7466666666666674e-07, 'epoch': 0.97}
-{'loss': 0.1114, 'grad_norm': 2.4202020168304443, 'learning_rate': 3.4133333333333337e-07, 'epoch': 0.97}
-{'loss': 0.0811, 'grad_norm': 2.8282711505889893, 'learning_rate': 3.0800000000000006e-07, 'epoch': 0.97}
-{'loss': 0.104, 'grad_norm': 4.20676326751709, 'learning_rate': 2.746666666666667e-07, 'epoch': 0.97}
-{'loss': 0.2773, 'grad_norm': 4.955998420715332, 'learning_rate': 2.413333333333333e-07, 'epoch': 0.98}
-{'loss': 0.1105, 'grad_norm': 2.0168468952178955, 'learning_rate': 2.08e-07, 'epoch': 0.98}
-{'loss': 0.0808, 'grad_norm': 1.6335862874984741, 'learning_rate': 1.7466666666666667e-07, 'epoch': 0.98}
-{'loss': 0.0786, 'grad_norm': 2.269954204559326, 'learning_rate': 1.4133333333333333e-07, 'epoch': 0.99}
-{'loss': 0.0801, 'grad_norm': 2.0813560485839844, 'learning_rate': 1.0800000000000001e-07, 'epoch': 0.99}
-{'loss': 0.0687, 'grad_norm': 1.6093230247497559, 'learning_rate': 7.466666666666667e-08, 'epoch': 0.99}
-{'loss': 0.0814, 'grad_norm': 1.730695366859436, 'learning_rate': 4.133333333333334e-08, 'epoch': 1.0}
-02/12/2025 18:28:18 - WARNING - huggingface_hub.utils._http - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 009f3961-f00b-4456-8f3b-f5beeed73ba9)')' thrown while requesting GET https://huggingface.co/datasets/asierhv/composite_corpus_eu_v2.1/resolve/2c2153d993ce951528b5b62eb207632c6d267c29/data/train-00011-of-00150.parquet
-Retrying in 1s [Retry 1/5].
-02/12/2025 18:28:18 - WARNING - huggingface_hub.utils._http - Retrying in 1s [Retry 1/5].
-100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [3:01:15<00:00,  1.12s/it][INFO|trainer.py:4176] 2025-02-12 18:28:26,590 >>
-{'loss': 0.0959, 'grad_norm': 3.418311595916748, 'learning_rate': 8e-09, 'epoch': 1.0}
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 18:28:26,591 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 18:28:26,591 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 18:28:36,443 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:36,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:37,929 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:39,629 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:40,907 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:42,147 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:43,290 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:44,383 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:45,515 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:46,714 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:47,807 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:48,974 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:50,030 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:51,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:51,940 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:53,011 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:53,928 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:54,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:55,861 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:56,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:57,733 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:58,729 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:28:59,711 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:00,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:01,667 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:02,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:03,638 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:04,705 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:05,860 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:06,791 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:07,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:08,866 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:09,912 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:10,983 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:11,974 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:13,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:14,105 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:15,079 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:16,146 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:17,157 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:19,369 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:20,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:21,395 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:22,338 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:23,321 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:24,296 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:25,367 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:26,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:27,341 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:28,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:29,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:30,504 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:31,539 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:32,463 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:33,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:34,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:35,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:36,481 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:37,440 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:38,514 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:39,450 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:40,506 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:41,508 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:42,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:43,575 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:44,506 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:45,387 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:46,407 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:47,411 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:48,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:49,418 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:50,409 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:51,497 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:52,490 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:53,574 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:54,590 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:55,627 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:56,552 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:57,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:58,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:29:59,542 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:00,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:01,497 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:02,513 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:03,573 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:04,628 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:05,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:06,649 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:07,783 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:08,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:09,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:11,030 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:12,070 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:13,037 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:14,025 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:15,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:16,199 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:17,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:18,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:19,243 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:20,274 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:21,301 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:22,390 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:23,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:24,445 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:25,581 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:26,601 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:27,726 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:28,833 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:29,822 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:30,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:31,830 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:32,877 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:33,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:34,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:35,764 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:36,727 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:37,789 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:38,822 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:39,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:40,838 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:41,863 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:42,813 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:43,852 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:44,875 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:45,853 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:46,820 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:47,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:48,833 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:49,830 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:50,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:51,814 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:30:52,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [3:03:50<00:00,  1.12s/it][INFO|trainer.py:3860] 2025-02-12 18:31:01,025 >> Saving model checkpoint to ./checkpoint-8000
-{'eval_loss': 0.1835634410381317, 'eval_wer': 10.886229784051602, 'eval_runtime': 154.4338, 'eval_samples_per_second': 13.624, 'eval_steps_per_second': 0.855, 'epoch': 1.0}
-[INFO|configuration_utils.py:423] 2025-02-12 18:31:01,026 >> Configuration saved in ./checkpoint-8000/config.json
-[INFO|configuration_utils.py:906] 2025-02-12 18:31:01,027 >> Configuration saved in ./checkpoint-8000/generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 18:31:02,413 >> Model weights saved in ./checkpoint-8000/model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 18:31:02,414 >> Feature extractor saved in ./checkpoint-8000/preprocessor_config.json
-[INFO|feature_extraction_utils.py:437] 2025-02-12 18:31:05,951 >> Feature extractor saved in ./preprocessor_config.json
-[INFO|trainer.py:2621] 2025-02-12 18:31:05,974 >>
-
-Training completed. Do not forget to share your model on huggingface.co/models =)
-
-
-[INFO|trainer.py:2859] 2025-02-12 18:31:05,974 >> Loading best model from ./checkpoint-8000 (score: 10.886229784051602).
-[WARNING|trainer.py:2996] 2025-02-12 18:31:06,378 >> There were missing keys in the checkpoint model loaded: ['proj_out.weight'].
-100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [3:03:55<00:00,  1.38s/it]
-{'train_runtime': 11036.9074, 'train_samples_per_second': 23.195, 'train_steps_per_second': 0.725, 'train_loss': 0.17036041705310345, 'epoch': 1.0}
-[INFO|trainer.py:4641] 2025-02-12 18:31:06,419 >> Waiting for the current checkpoint push to be finished, this might take a couple of minutes.
-It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 18:31:10 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-[INFO|trainer.py:3860] 2025-02-12 18:32:59,287 >> Saving model checkpoint to ./
-[INFO|configuration_utils.py:423] 2025-02-12 18:32:59,288 >> Configuration saved in ./config.json
-[INFO|configuration_utils.py:906] 2025-02-12 18:32:59,290 >> Configuration saved in ./generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 18:33:01,543 >> Model weights saved in ./model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 18:33:01,544 >> Feature extractor saved in ./preprocessor_config.json
-[INFO|trainer.py:3860] 2025-02-12 18:33:01,545 >> Saving model checkpoint to ./
-[INFO|configuration_utils.py:423] 2025-02-12 18:33:01,546 >> Configuration saved in ./config.json
-[INFO|configuration_utils.py:906] 2025-02-12 18:33:01,547 >> Configuration saved in ./generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 18:33:03,594 >> Model weights saved in ./model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 18:33:03,596 >> Feature extractor saved in ./preprocessor_config.json
-[INFO|modelcard.py:449] 2025-02-12 18:33:03,826 >> Dropping the following result as it does not have all the necessary fields:
-{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 10.886229784051602}]}
-It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 18:33:07 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-run-lejyafmi.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.62M/4.62M [00:01<00:00, 3.10MB/s]
-***** train metrics *****
-  epoch                    =           1.0
-  total_flos               = 68804121093GF
-  train_loss               =        0.1704
-  train_runtime            =    3:03:56.90
-  train_samples_per_second =        23.195
-  train_steps_per_second   =         0.725
-02/12/2025 18:34:14 - INFO - __main__ - *** Evaluate ***
-[INFO|trainer.py:4176] 2025-02-12 18:34:14,390 >>
-***** Running Evaluation *****
-[INFO|trainer.py:4180] 2025-02-12 18:34:14,390 >>   Num examples: Unknown
-[INFO|trainer.py:4181] 2025-02-12 18:34:14,390 >>   Batch size = 16
-[INFO|trainer_utils.py:837] 2025-02-12 18:34:21,770 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:21,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:23,093 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:24,451 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:25,719 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:26,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:28,007 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:29,124 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:30,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:31,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:32,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:33,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:34,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:35,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:36,700 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:37,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:38,634 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:39,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:40,537 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:41,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:42,380 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:43,354 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:44,341 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:45,314 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:46,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:47,281 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:48,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:49,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:50,458 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:51,370 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:52,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:53,396 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:54,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:55,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:56,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:57,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:58,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:34:59,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:00,681 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:01,717 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:03,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:04,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:05,993 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:06,930 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:07,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:08,880 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:09,968 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:10,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:11,921 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:13,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:14,074 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:15,109 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:16,148 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:17,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:18,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:19,129 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:20,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:21,190 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:22,160 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:23,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:24,153 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:25,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:26,238 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:27,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:28,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:29,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:30,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:31,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:32,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:33,287 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:34,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:35,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:36,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:37,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:38,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:39,530 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:40,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:41,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:42,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:43,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:44,509 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:45,496 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:46,461 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:47,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:48,535 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:49,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:50,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:51,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:52,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:53,784 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:54,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:55,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:56,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:57,957 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:35:58,926 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:00,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:01,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:02,083 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:03,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:04,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:05,141 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:06,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:07,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:08,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:09,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:10,412 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:11,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:12,466 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:13,562 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:14,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:15,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:16,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:17,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:18,568 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:19,501 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:20,504 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:21,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:22,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:23,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:24,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:25,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:26,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:27,543 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:28,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:29,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:30,585 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:31,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:32,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:33,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:34,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:35,552 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:36,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-[INFO|generation_whisper.py:1844] 2025-02-12 18:36:37,437 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
-***** eval metrics *****
-  epoch                   =        1.0
-  eval_loss               =     0.1836
-  eval_runtime            = 0:02:31.38
-  eval_samples_per_second =     13.899
-  eval_steps_per_second   =      0.872
-  eval_wer                =    10.8862
-[INFO|trainer.py:3860] 2025-02-12 18:36:45,773 >> Saving model checkpoint to ./
-[INFO|configuration_utils.py:423] 2025-02-12 18:36:45,774 >> Configuration saved in ./config.json
-[INFO|configuration_utils.py:906] 2025-02-12 18:36:45,775 >> Configuration saved in ./generation_config.json
-[INFO|modeling_utils.py:3040] 2025-02-12 18:36:47,949 >> Model weights saved in ./model.safetensors
-[INFO|feature_extraction_utils.py:437] 2025-02-12 18:36:47,950 >> Feature extractor saved in ./preprocessor_config.json
-It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-02/12/2025 18:36:51 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
-  warnings.warn(
-run-lejyafmi.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69M/4.69M [00:01<00:00, 3.23MB/s]
diff --git a/wandb/run-20250212_152709-lejyafmi/files/requirements.txt b/wandb/run-20250212_152709-lejyafmi/files/requirements.txt
deleted file mode 100644
index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152709-lejyafmi/files/requirements.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-aiosignal==1.3.2
-Markdown==3.7
-more-itertools==10.6.0
-requests==2.32.3
-sentry-sdk==2.21.0
-torchaudio==2.6.0
-charset-normalizer==3.4.1
-docker-pycreds==0.4.0
-nvidia-cusolver-cu12==11.6.1.9
-PyYAML==6.0.2
-librosa==0.10.2.post1
-soxr==0.5.0.post1
-multiprocess==0.70.16
-setuptools==75.8.0
-nvidia-cufft-cu12==11.2.1.3
-joblib==1.4.2
-pytz==2025.1
-pip==24.0
-scikit-learn==1.6.1
-certifi==2025.1.31
-jiwer==3.1.0
-regex==2024.11.6
-annotated-types==0.7.0
-grpcio==1.70.0
-msgpack==1.1.0
-mpmath==1.3.0
-nvidia-cudnn-cu12==9.1.0.70
-soundfile==0.13.1
-dill==0.3.8
-nvidia-nvtx-cu12==12.4.127
-six==1.17.0
-nvidia-cuda-cupti-cu12==12.4.127
-pyarrow==19.0.0
-nvidia-nccl-cu12==2.21.5
-psutil==6.1.1
-decorator==5.1.1
-llvmlite==0.44.0
-frozenlist==1.5.0
-pydantic==2.10.6
-networkx==3.4.2
-idna==3.10
-wandb==0.19.6
-aiohttp==3.11.12
-RapidFuzz==3.12.1
-pandas==2.2.3
-python-dateutil==2.9.0.post0
-numpy==2.1.3
-tokenizers==0.21.0
-nvidia-cusparselt-cu12==0.6.2
-typing_extensions==4.12.2
-urllib3==2.3.0
-setproctitle==1.3.4
-tzdata==2025.1
-sympy==1.13.1
-pooch==1.8.2
-click==8.1.8
-pydantic_core==2.27.2
-MarkupSafe==3.0.2
-scipy==1.15.1
-accelerate==1.3.0
-tensorboard==2.19.0
-protobuf==5.29.3
-gitdb==4.0.12
-smmap==5.0.2
-absl-py==2.1.0
-tqdm==4.67.1
-yarl==1.18.3
-pycparser==2.22
-nvidia-cusparse-cu12==12.3.1.170
-attrs==25.1.0
-lazy_loader==0.4
-tensorboard-data-server==0.7.2
-threadpoolctl==3.5.0
-GitPython==3.1.44
-safetensors==0.5.2
-fsspec==2024.12.0
-nvidia-cuda-nvrtc-cu12==12.4.127
-filelock==3.17.0
-aiohappyeyeballs==2.4.6
-packaging==24.2
-datasets==3.2.1.dev0
-audioread==3.0.1
-propcache==0.2.1
-transformers==4.49.0.dev0
-nvidia-cuda-runtime-cu12==12.4.127
-cffi==1.17.1
-evaluate==0.4.3
-Werkzeug==3.1.3
-huggingface-hub==0.28.1
-Jinja2==3.1.5
-torch==2.6.0
-nvidia-curand-cu12==10.3.5.147
-xxhash==3.5.0
-platformdirs==4.3.6
-multidict==6.1.0
-nvidia-cublas-cu12==12.4.5.8
-nvidia-nvjitlink-cu12==12.4.127
-triton==3.2.0
-numba==0.61.0
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-typeguard==4.3.0
-more-itertools==10.3.0
-tomli==2.0.1
-autocommand==2.2.2
-zipp==3.19.2
-typing_extensions==4.12.2
-backports.tarfile==1.2.0
-inflect==7.3.1
-jaraco.text==3.12.1
-wheel==0.43.0
-packaging==24.2
-jaraco.collections==5.1.0
-jaraco.functools==4.0.1
-jaraco.context==5.3.0
diff --git a/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json b/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json
deleted file mode 100644
index 15c28b0022a20f8df79ea54c97b8c208b5b9a6cb..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json
+++ /dev/null
@@ -1,86 +0,0 @@
-{
-  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
-  "python":  "CPython 3.12.3",
-  "startedAt":  "2025-02-12T15:27:10.113915Z",
-  "args":  [
-    "--model_name_or_path=openai/whisper-small",
-    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
-    "--language=basque",
-    "--train_split_name=train",
-    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
-    "--model_index_name=Whisper Small Basque",
-    "--max_steps=8000",
-    "--output_dir=./",
-    "--per_device_train_batch_size=32",
-    "--per_device_eval_batch_size=16",
-    "--gradient_accumulation_steps=1",
-    "--logging_steps=25",
-    "--learning_rate=1e-5",
-    "--warmup_steps=500",
-    "--evaluation_strategy=steps",
-    "--eval_steps=1000",
-    "--save_strategy=steps",
-    "--save_steps=1000",
-    "--generation_max_length=225",
-    "--length_column_name=input_length",
-    "--max_duration_in_seconds=30",
-    "--audio_column_name=audio",
-    "--text_column_name=sentence",
-    "--freeze_feature_encoder=False",
-    "--report_to=tensorboard",
-    "--metric_for_best_model=wer",
-    "--greater_is_better=False",
-    "--load_best_model_at_end",
-    "--gradient_checkpointing",
-    "--fp16",
-    "--overwrite_output_dir",
-    "--do_train",
-    "--do_eval",
-    "--predict_with_generate",
-    "--do_normalize_eval",
-    "--streaming",
-    "--use_auth_token",
-    "--push_to_hub",
-    "--report_to",
-    "wandb",
-    "--run_name",
-    "whisper-small-eu"
-  ],
-  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
-  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
-  "git":  {
-    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
-    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
-  },
-  "email":  "xezpeleta@gmail.com",
-  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
-  "host":  "tknika",
-  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
-  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
-  "cpu_count":  8,
-  "cpu_count_logical":  8,
-  "gpu":  "NVIDIA L40-48Q",
-  "gpu_count":  1,
-  "disk":  {
-    "/":  {
-      "total":  "525987168256",
-      "used":  "315242278912"
-    }
-  },
-  "memory":  {
-    "total":  "33654022144"
-  },
-  "cpu":  {
-    "count":  8,
-    "countLogical":  8
-  },
-  "gpu_nvidia":  [
-    {
-      "name":  "NVIDIA L40-48Q",
-      "memoryTotal":  "51539607552",
-      "cudaCores":  18176,
-      "architecture":  "Ada"
-    }
-  ],
-  "cudaVersion":  "12.4"
-}
\ No newline at end of file
diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log b/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log
deleted file mode 100644
index b6d594cdf89ccbf6c593c3f1737e25809a758e18..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log
+++ /dev/null
@@ -1,6 +0,0 @@
-{"time":"2025-02-12T15:27:09.931528058Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqvyk3k9m/port-243546.txt","pid":243546,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
-{"time":"2025-02-12T15:27:09.940481802Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":243546}
-{"time":"2025-02-12T15:27:09.940461772Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38449,"Zone":""}}
-{"time":"2025-02-12T15:27:10.107162928Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:41928"}
-{"time":"2025-02-12T15:27:10.115855515Z","level":"INFO","msg":"handleInformInit: received","streamId":"lejyafmi","id":"127.0.0.1:41928"}
-{"time":"2025-02-12T15:27:10.220025051Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"lejyafmi","id":"127.0.0.1:41928"}
diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log b/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
deleted file mode 100644
index 3e76e3e81a1059aef65345804a4f269bcd9183ad..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
+++ /dev/null
@@ -1,7 +0,0 @@
-{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
-{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
-{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug.log b/wandb/run-20250212_152709-lejyafmi/logs/debug.log
deleted file mode 100644
index d6ccd57adcf2bbe370a51abbc03e1e4a2718eb85..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152709-lejyafmi/logs/debug.log
+++ /dev/null
@@ -1,25 +0,0 @@
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():756] calling init triggers
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
-config: {'_wandb': {}}
-2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():789] starting backend
-2025-02-12 15:27:10,107 INFO    MainThread:243546 [wandb_init.py:init():793] sending inform_init request
-2025-02-12 15:27:10,112 INFO    MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-02-12 15:27:10,113 INFO    MainThread:243546 [wandb_init.py:init():808] backend started and connected
-2025-02-12 15:27:10,115 INFO    MainThread:243546 [wandb_init.py:init():901] updated telemetry
-2025-02-12 15:27:10,122 INFO    MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
-2025-02-12 15:27:10,584 INFO    MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
-2025-02-12 15:27:10,691 INFO    MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
-2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
-2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
-2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
-2025-02-12 15:27:10,694 INFO    MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
-2025-02-12 15:27:10,698 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
-2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
-2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
diff --git a/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb b/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb
deleted file mode 100644
index 69feb1920a8b41f53fa3307d140365bf10af15fc..0000000000000000000000000000000000000000
--- a/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:218ae98ab28234be327e4ea9293f7b5d13580cf3d80509614063d5a55716991b
-size 4685824