diff --git a/.gitignore b/.gitignore index 0d763d77e0bd715a70781df9cbd287e6819c63ed..98c8233d456d2b5b8468791fb8ac8e89204c9c49 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ checkpoint-*/ nohup.out .venv/ +wandb/ diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log deleted file mode 100644 index 3e76e3e81a1059aef65345804a4f269bcd9183ad..0000000000000000000000000000000000000000 --- a/wandb/debug-internal.log +++ /dev/null @@ -1,7 +0,0 @@ -{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"} -{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"} diff --git a/wandb/debug.log b/wandb/debug.log deleted file mode 100644 index d6ccd57adcf2bbe370a51abbc03e1e4a2718eb85..0000000000000000000000000000000000000000 --- a/wandb/debug.log +++ /dev/null @@ -1,25 +0,0 @@ -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546 -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():756] calling init triggers -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():789] starting backend -2025-02-12 15:27:10,107 INFO MainThread:243546 [wandb_init.py:init():793] sending inform_init request -2025-02-12 15:27:10,112 INFO MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 15:27:10,113 INFO MainThread:243546 [wandb_init.py:init():808] backend started and connected -2025-02-12 15:27:10,115 INFO MainThread:243546 [wandb_init.py:init():901] updated telemetry -2025-02-12 15:27:10,122 INFO MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 15:27:10,584 INFO MainThread:243546 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 15:27:10,691 INFO MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 15:27:10,694 INFO MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 15:27:10,698 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None diff --git a/wandb/run-20250212_121751-d4i88lzt/files/config.yaml b/wandb/run-20250212_121751-d4i88lzt/files/config.yaml deleted file mode 100644 index 043453cafc3d9969981ef61b7a8be2e92734597b..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_12-17-27_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_121751-d4i88lzt/files/output.log b/wandb/run-20250212_121751-d4i88lzt/files/output.log deleted file mode 100644 index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt b/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json b/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json deleted file mode 100644 index 1c4f87be68cf8cc40c16f58b87fc77156484b279..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T12:17:51.527114Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "297346564096" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json b/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log deleted file mode 100644 index d6af67f7efa449508164027a6273196ce78339b0..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392} -{"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}} -{"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"} -{"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log deleted file mode 100644 index 26894375e49df56758efcfe21e6d3c1198d1f1c3..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"} -{"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"} -{"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"} -{"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"} -{"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"} -{"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"} -{"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"} -{"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"} -{"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"} -{"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"} -{"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"} diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug.log deleted file mode 100644 index 9d9b0f45e81af07737809add0aa564e5a82d4e9b..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_121751-d4i88lzt/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392 -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log -2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:init():756] calling init triggers -2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():789] starting backend -2025-02-12 12:17:51,521 INFO MainThread:223392 [wandb_init.py:init():793] sending inform_init request -2025-02-12 12:17:51,526 INFO MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 12:17:51,526 INFO MainThread:223392 [wandb_init.py:init():808] backend started and connected -2025-02-12 12:17:51,528 INFO MainThread:223392 [wandb_init.py:init():901] updated telemetry -2025-02-12 12:17:51,535 INFO MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 12:17:51,944 INFO MainThread:223392 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 12:17:52,051 INFO MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 12:17:52,052 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 12:17:52,054 INFO MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 12:17:52,055 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb b/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb deleted file mode 100644 index 22a915c2f93dba32f6fb4cb19cdad8d41103ad8f..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb and /dev/null differ diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml b/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml deleted file mode 100644 index 0190eccc1f197e04acf36d5f4461aa7e993e6582..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_12-26-11_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/output.log b/wandb/run-20250212_122637-v3d3ouvn/files/output.log deleted file mode 100644 index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt b/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json deleted file mode 100644 index 09e834199f3ff4987252a9c26cf0f4e0a17ac89c..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T12:26:37.277902Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "297346666496" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log deleted file mode 100644 index a2c451b0fdc549443d1d6b56f3138b2f0778d2da..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110} -{"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}} -{"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"} -{"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log deleted file mode 100644 index 836534c70d6b5a016b47367347a114c8262f9db4..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"} -{"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"} -{"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"} diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log deleted file mode 100644 index 651a7c89fd0b1c415f299466295634e0fb5f97ae..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110 -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():756] calling init triggers -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():789] starting backend -2025-02-12 12:26:37,272 INFO MainThread:224110 [wandb_init.py:init():793] sending inform_init request -2025-02-12 12:26:37,277 INFO MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 12:26:37,277 INFO MainThread:224110 [wandb_init.py:init():808] backend started and connected -2025-02-12 12:26:37,279 INFO MainThread:224110 [wandb_init.py:init():901] updated telemetry -2025-02-12 12:26:37,285 INFO MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 12:26:37,653 INFO MainThread:224110 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 12:26:37,764 INFO MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 12:26:37,766 INFO MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 12:26:37,767 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb b/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb deleted file mode 100644 index ec09d574bec753b9c13409f7b715d91402a72733..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb and /dev/null differ diff --git a/wandb/run-20250212_122854-4m048f5s/files/config.yaml b/wandb/run-20250212_122854-4m048f5s/files/config.yaml deleted file mode 100644 index cd2427648bdf8f30b7a5b1a74e995772423700bd..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_12-28-29_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_122854-4m048f5s/files/output.log b/wandb/run-20250212_122854-4m048f5s/files/output.log deleted file mode 100644 index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_122854-4m048f5s/files/requirements.txt b/wandb/run-20250212_122854-4m048f5s/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json b/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json deleted file mode 100644 index f63869dc081394dcbd07bd84335642df549da86e..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T12:28:54.528397Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "297346756608" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json b/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log b/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log deleted file mode 100644 index 0a340626ec035668304ac5b99a523d0e9b994b99..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528} -{"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}} -{"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"} -{"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log b/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log deleted file mode 100644 index 12bd0e04a87c9e69aaa4351910eaad3205df4abf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"} -{"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"} -{"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"} -{"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"} -{"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"} -{"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"} -{"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"} -{"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"} -{"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"} -{"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"} -{"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"} diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug.log b/wandb/run-20250212_122854-4m048f5s/logs/debug.log deleted file mode 100644 index 54eaf255ca03d83dfe297f954d06f7be79378056..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_122854-4m048f5s/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528 -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():756] calling init triggers -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():789] starting backend -2025-02-12 12:28:54,521 INFO MainThread:224528 [wandb_init.py:init():793] sending inform_init request -2025-02-12 12:28:54,527 INFO MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 12:28:54,528 INFO MainThread:224528 [wandb_init.py:init():808] backend started and connected -2025-02-12 12:28:54,530 INFO MainThread:224528 [wandb_init.py:init():901] updated telemetry -2025-02-12 12:28:54,537 INFO MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 12:28:54,883 INFO MainThread:224528 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 12:28:54,988 INFO MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 12:28:54,990 INFO MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 12:28:54,991 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb b/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb deleted file mode 100644 index d6e2b9a990f433fbef578c9ebdfb5b9a71ba6c24..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb and /dev/null differ diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml b/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml deleted file mode 100644 index fda1ff6a94e5d574407cad68313478699d3b809c..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_12-51-48_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/output.log b/wandb/run-20250212_125202-c6xjc1gs/files/output.log deleted file mode 100644 index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt b/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json deleted file mode 100644 index de67d54e80183d40bb763b73a71aa662e9c26e2d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T12:52:03.105234Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=test_parl", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313777016832" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log deleted file mode 100644 index b2a91d838478809786e569e4b108333296f1c838..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112} -{"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}} -{"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"} -{"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log deleted file mode 100644 index e92e3fbe32b1a1557ac2822b511c40cd3bec6edc..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"} -{"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"} -{"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"} diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log deleted file mode 100644 index f7f6579dfe6ece92d42e8341a45478b9a1a5d112..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 12:52:02,886 INFO MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112 -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():756] calling init triggers -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():789] starting backend -2025-02-12 12:52:03,097 INFO MainThread:226112 [wandb_init.py:init():793] sending inform_init request -2025-02-12 12:52:03,104 INFO MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 12:52:03,104 INFO MainThread:226112 [wandb_init.py:init():808] backend started and connected -2025-02-12 12:52:03,107 INFO MainThread:226112 [wandb_init.py:init():901] updated telemetry -2025-02-12 12:52:03,114 INFO MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 12:52:03,483 INFO MainThread:226112 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 12:52:03,568 INFO MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 12:52:03,569 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb b/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb deleted file mode 100644 index 2f75b3e73a217b18b87ae8966776fa45be09c53d..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb and /dev/null differ diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml b/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml deleted file mode 100644 index 07f41facb74690088d1d0b07a0c5bd2e9cb291f4..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_12-58-59_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/output.log b/wandb/run-20250212_125924-xhsgsxqq/files/output.log deleted file mode 100644 index 0e757b553e4758f4d7cd9dbeb4450a0b166b1880..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt b/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json deleted file mode 100644 index 6df8aff999de9e30b89c5c5cc1a7450cf021e67a..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T12:59:24.816046Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313777115136" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log deleted file mode 100644 index b7598f14ab3c8cbb4c8d36057ac70a206f4ddfed..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T12:59:24.63359638Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa90v2n0h/port-226591.txt","pid":226591,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T12:59:24.673351851Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226591} -{"time":"2025-02-12T12:59:24.673324591Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41203,"Zone":""}} -{"time":"2025-02-12T12:59:24.809390061Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:24.819517706Z","level":"INFO","msg":"handleInformInit: received","streamId":"xhsgsxqq","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:24.923364896Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xhsgsxqq","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:25.341856618Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:25.341962867Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T12:59:25.341964847Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:25.342139496Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:25.569637185Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41203->127.0.0.1:53388: use of closed network connection","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:26.643739482Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:26.643783881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:53388"} -{"time":"2025-02-12T12:59:26.643808411Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log deleted file mode 100644 index 26b5602b5006fa339037330ef9890aa5ee369829..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T12:59:24.819689255Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log"} -{"time":"2025-02-12T12:59:24.923303396Z","level":"INFO","msg":"created new stream","id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:24.923354596Z","level":"INFO","msg":"stream: started","id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:24.923472545Z","level":"INFO","msg":"writer: Do: started","stream_id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:24.923494475Z","level":"INFO","msg":"handler: started","stream_id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:24.923560215Z","level":"INFO","msg":"sender: started","stream_id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:25.192419842Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T12:59:25.341944447Z","level":"INFO","msg":"stream: closing","id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:25.341971537Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T12:59:25.342739283Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T12:59:26.408412135Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T12:59:26.643441283Z","level":"INFO","msg":"handler: closed","stream_id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:26.643483513Z","level":"INFO","msg":"writer: Close: closed","stream_id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:26.643525403Z","level":"INFO","msg":"sender: closed","stream_id":"xhsgsxqq"} -{"time":"2025-02-12T12:59:26.643566923Z","level":"INFO","msg":"stream: closed","id":"xhsgsxqq"} diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log deleted file mode 100644 index bc26d42fc8a709f9cfcfa6a7dbdbbc82be8f50e9..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Configure stats pid to 226591 -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():756] calling init triggers -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 12:59:24,598 INFO MainThread:226591 [wandb_init.py:init():789] starting backend -2025-02-12 12:59:24,809 INFO MainThread:226591 [wandb_init.py:init():793] sending inform_init request -2025-02-12 12:59:24,815 INFO MainThread:226591 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 12:59:24,815 INFO MainThread:226591 [wandb_init.py:init():808] backend started and connected -2025-02-12 12:59:24,818 INFO MainThread:226591 [wandb_init.py:init():901] updated telemetry -2025-02-12 12:59:24,823 INFO MainThread:226591 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 12:59:25,189 INFO MainThread:226591 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 12:59:25,301 INFO MainThread:226591 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 12:59:25,303 INFO MainThread:226591 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 12:59:25,304 INFO MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-58-59_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 12:59:25,306 INFO MainThread:226591 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 12:59:25,306 INFO MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 12:59:25,342 WARNING MsgRouterThr:226591 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb b/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb deleted file mode 100644 index 1b53de295e68266af60d83bca74b9be0894b34e8..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb and /dev/null differ diff --git a/wandb/run-20250212_130533-zeu6vay4/files/config.yaml b/wandb/run-20250212_130533-zeu6vay4/files/config.yaml deleted file mode 100644 index 420595717bd915426e2e2cab93fdb5e08bd589cb..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_13-05-10_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_130533-zeu6vay4/files/output.log b/wandb/run-20250212_130533-zeu6vay4/files/output.log deleted file mode 100644 index 769cfff2bf9a545350da2d3c1f0974308181aefb..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 582, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt b/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json b/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json deleted file mode 100644 index aebfc577d2e88050d8085f734bc8f66f785f3040..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T13:05:34.019960Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313777221632" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json b/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log deleted file mode 100644 index 87fbf64e8666318f6e5b43dae2d01a140fc24ead..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T13:05:33.837553705Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvxrga7tq/port-227738.txt","pid":227738,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T13:05:33.841824082Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":227738} -{"time":"2025-02-12T13:05:33.841805772Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41271,"Zone":""}} -{"time":"2025-02-12T13:05:34.013327792Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:34.022772969Z","level":"INFO","msg":"handleInformInit: received","streamId":"zeu6vay4","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:34.128884037Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"zeu6vay4","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:34.567948471Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:34.568036341Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:34.568049191Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T13:05:34.568283769Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:34.807984453Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41271->127.0.0.1:36972: use of closed network connection","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:35.861897508Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:35.861922138Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:36972"} -{"time":"2025-02-12T13:05:35.861946917Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log deleted file mode 100644 index d3fe88aa2e519427abbec9fea34b1053792e9916..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T13:05:34.023147337Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log"} -{"time":"2025-02-12T13:05:34.128801887Z","level":"INFO","msg":"created new stream","id":"zeu6vay4"} -{"time":"2025-02-12T13:05:34.128873947Z","level":"INFO","msg":"stream: started","id":"zeu6vay4"} -{"time":"2025-02-12T13:05:34.128926027Z","level":"INFO","msg":"writer: Do: started","stream_id":"zeu6vay4"} -{"time":"2025-02-12T13:05:34.128988376Z","level":"INFO","msg":"sender: started","stream_id":"zeu6vay4"} -{"time":"2025-02-12T13:05:34.129040036Z","level":"INFO","msg":"handler: started","stream_id":"zeu6vay4"} -{"time":"2025-02-12T13:05:34.419229803Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T13:05:34.568056211Z","level":"INFO","msg":"stream: closing","id":"zeu6vay4"} -{"time":"2025-02-12T13:05:34.56812972Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T13:05:34.568907056Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T13:05:35.64406902Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T13:05:35.861616759Z","level":"INFO","msg":"handler: closed","stream_id":"zeu6vay4"} -{"time":"2025-02-12T13:05:35.861676279Z","level":"INFO","msg":"writer: Close: closed","stream_id":"zeu6vay4"} -{"time":"2025-02-12T13:05:35.861688929Z","level":"INFO","msg":"sender: closed","stream_id":"zeu6vay4"} -{"time":"2025-02-12T13:05:35.861776488Z","level":"INFO","msg":"stream: closed","id":"zeu6vay4"} diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug.log deleted file mode 100644 index 9b2ee9877baf73f9ef90a46ae8c6af7f3f198927..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_130533-zeu6vay4/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 13:05:33,802 INFO MainThread:227738 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 13:05:33,802 INFO MainThread:227738 [wandb_setup.py:_flush():68] Configure stats pid to 227738 -2025-02-12 13:05:33,802 INFO MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 13:05:33,802 INFO MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 13:05:33,802 INFO MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 13:05:33,802 INFO MainThread:227738 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug.log -2025-02-12 13:05:33,803 INFO MainThread:227738 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log -2025-02-12 13:05:33,803 INFO MainThread:227738 [wandb_init.py:init():756] calling init triggers -2025-02-12 13:05:33,803 INFO MainThread:227738 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 13:05:33,803 INFO MainThread:227738 [wandb_init.py:init():789] starting backend -2025-02-12 13:05:34,013 INFO MainThread:227738 [wandb_init.py:init():793] sending inform_init request -2025-02-12 13:05:34,019 INFO MainThread:227738 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 13:05:34,019 INFO MainThread:227738 [wandb_init.py:init():808] backend started and connected -2025-02-12 13:05:34,021 INFO MainThread:227738 [wandb_init.py:init():901] updated telemetry -2025-02-12 13:05:34,029 INFO MainThread:227738 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 13:05:34,416 INFO MainThread:227738 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 13:05:34,527 INFO MainThread:227738 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 13:05:34,527 INFO MainThread:227738 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 13:05:34,527 INFO MainThread:227738 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 13:05:34,527 INFO MainThread:227738 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 13:05:34,529 INFO MainThread:227738 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 13:05:34,530 INFO MainThread:227738 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-05-10_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 13:05:34,533 INFO MainThread:227738 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 13:05:34,533 INFO MainThread:227738 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 13:05:34,568 WARNING MsgRouterThr:227738 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb b/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb deleted file mode 100644 index 7901333681792ad6d6f54878666ba62ed5e18337..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb and /dev/null differ diff --git a/wandb/run-20250212_131820-cnos968u/files/config.yaml b/wandb/run-20250212_131820-cnos968u/files/config.yaml deleted file mode 100644 index 95906302a5990edf8b7625c376bbe1acd0b580a0..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_13-17-51_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_131820-cnos968u/files/output.log b/wandb/run-20250212_131820-cnos968u/files/output.log deleted file mode 100644 index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_131820-cnos968u/files/requirements.txt b/wandb/run-20250212_131820-cnos968u/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json b/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json deleted file mode 100644 index 90498074d4382bf401e4eae2589b05d71bb571fe..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T13:18:20.315500Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313777364992" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json b/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug-core.log b/wandb/run-20250212_131820-cnos968u/logs/debug-core.log deleted file mode 100644 index c6146a7673a983023bddad8f6b42c8bee05d89af..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T13:18:20.13270486Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoihqtf7_/port-228562.txt","pid":228562,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T13:18:20.138326066Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":228562} -{"time":"2025-02-12T13:18:20.138290606Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43223,"Zone":""}} -{"time":"2025-02-12T13:18:20.311508128Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:20.318120198Z","level":"INFO","msg":"handleInformInit: received","streamId":"cnos968u","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:20.423169259Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"cnos968u","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:20.829667746Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:20.829731656Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:20.829793855Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T13:18:20.829860655Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:20.996850774Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:43223->127.0.0.1:59824: use of closed network connection","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:22.315588426Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:22.315614045Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:59824"} -{"time":"2025-02-12T13:18:22.315638895Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log b/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log deleted file mode 100644 index 8906a5937e9d4dc75937d8ef3efaf8e6c19bc30f..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T13:18:20.318385167Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug-core.log"} -{"time":"2025-02-12T13:18:20.423108789Z","level":"INFO","msg":"created new stream","id":"cnos968u"} -{"time":"2025-02-12T13:18:20.423149939Z","level":"INFO","msg":"stream: started","id":"cnos968u"} -{"time":"2025-02-12T13:18:20.423187669Z","level":"INFO","msg":"writer: Do: started","stream_id":"cnos968u"} -{"time":"2025-02-12T13:18:20.423274918Z","level":"INFO","msg":"sender: started","stream_id":"cnos968u"} -{"time":"2025-02-12T13:18:20.423278528Z","level":"INFO","msg":"handler: started","stream_id":"cnos968u"} -{"time":"2025-02-12T13:18:20.691599029Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T13:18:20.829801165Z","level":"INFO","msg":"stream: closing","id":"cnos968u"} -{"time":"2025-02-12T13:18:20.829838505Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T13:18:20.830448261Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T13:18:22.102558325Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T13:18:22.315316527Z","level":"INFO","msg":"handler: closed","stream_id":"cnos968u"} -{"time":"2025-02-12T13:18:22.315380557Z","level":"INFO","msg":"writer: Close: closed","stream_id":"cnos968u"} -{"time":"2025-02-12T13:18:22.315415207Z","level":"INFO","msg":"sender: closed","stream_id":"cnos968u"} -{"time":"2025-02-12T13:18:22.315473316Z","level":"INFO","msg":"stream: closed","id":"cnos968u"} diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug.log b/wandb/run-20250212_131820-cnos968u/logs/debug.log deleted file mode 100644 index 39be2fccb1e90ae66cd4e4477210e91974f70b36..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_131820-cnos968u/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_setup.py:_flush():68] Configure stats pid to 228562 -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug.log -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_init.py:init():756] calling init triggers -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 13:18:20,106 INFO MainThread:228562 [wandb_init.py:init():789] starting backend -2025-02-12 13:18:20,311 INFO MainThread:228562 [wandb_init.py:init():793] sending inform_init request -2025-02-12 13:18:20,315 INFO MainThread:228562 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 13:18:20,315 INFO MainThread:228562 [wandb_init.py:init():808] backend started and connected -2025-02-12 13:18:20,316 INFO MainThread:228562 [wandb_init.py:init():901] updated telemetry -2025-02-12 13:18:20,320 INFO MainThread:228562 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 13:18:20,688 INFO MainThread:228562 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 13:18:20,789 INFO MainThread:228562 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 13:18:20,789 INFO MainThread:228562 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 13:18:20,789 INFO MainThread:228562 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 13:18:20,789 INFO MainThread:228562 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 13:18:20,791 INFO MainThread:228562 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 13:18:20,792 INFO MainThread:228562 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-17-51_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 13:18:20,794 INFO MainThread:228562 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 13:18:20,794 INFO MainThread:228562 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 13:18:20,830 WARNING MsgRouterThr:228562 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb b/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb deleted file mode 100644 index 8fd59480179eef903cc9efb265ca12fb88f77992..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb and /dev/null differ diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml b/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml deleted file mode 100644 index cf75123251a142c3bb6a48006c0bdfb4679249b3..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_13-49-16_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/output.log b/wandb/run-20250212_134942-5ywh9vkd/files/output.log deleted file mode 100644 index b9e4b6c313316dc48b344f88dc5473c4de1f1088..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 580, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt b/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json deleted file mode 100644 index 3da9f22f6e027ebd1af1c6145cfa29e6643c9e3d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T13:49:42.549340Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313777541120" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log deleted file mode 100644 index 14ea2e58769ad59628cf9dcf7f7d3c3cd69e16ea..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T13:49:42.368539349Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcs75h_7n/port-230104.txt","pid":230104,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T13:49:42.376031144Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230104} -{"time":"2025-02-12T13:49:42.375994744Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46317,"Zone":""}} -{"time":"2025-02-12T13:49:42.545856407Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:42.550347793Z","level":"INFO","msg":"handleInformInit: received","streamId":"5ywh9vkd","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:42.653585761Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"5ywh9vkd","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:43.065873804Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:43.065944244Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:43.065999603Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T13:49:43.066118552Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:43.307941987Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:46317->127.0.0.1:58840: use of closed network connection","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:44.336718599Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:44.336762259Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:58840"} -{"time":"2025-02-12T13:49:44.336780169Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log deleted file mode 100644 index 958ba48035f615c72447b59df82dad6ed3e33a35..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T13:49:42.550471882Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log"} -{"time":"2025-02-12T13:49:42.653534801Z","level":"INFO","msg":"created new stream","id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:42.653576741Z","level":"INFO","msg":"stream: started","id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:42.653711879Z","level":"INFO","msg":"handler: started","stream_id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:42.653689329Z","level":"INFO","msg":"writer: Do: started","stream_id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:42.653750879Z","level":"INFO","msg":"sender: started","stream_id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:42.915738751Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T13:49:43.065978643Z","level":"INFO","msg":"stream: closing","id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:43.066102462Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T13:49:43.066991893Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T13:49:44.121293278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T13:49:44.336369943Z","level":"INFO","msg":"handler: closed","stream_id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:44.336433982Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:44.336490092Z","level":"INFO","msg":"sender: closed","stream_id":"5ywh9vkd"} -{"time":"2025-02-12T13:49:44.336537621Z","level":"INFO","msg":"stream: closed","id":"5ywh9vkd"} diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log deleted file mode 100644 index daf9c305c6a9112d08089604436fe99d07693e41..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 13:49:42,336 INFO MainThread:230104 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 13:49:42,336 INFO MainThread:230104 [wandb_setup.py:_flush():68] Configure stats pid to 230104 -2025-02-12 13:49:42,336 INFO MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 13:49:42,336 INFO MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 13:49:42,336 INFO MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 13:49:42,336 INFO MainThread:230104 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log -2025-02-12 13:49:42,337 INFO MainThread:230104 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log -2025-02-12 13:49:42,337 INFO MainThread:230104 [wandb_init.py:init():756] calling init triggers -2025-02-12 13:49:42,337 INFO MainThread:230104 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 13:49:42,337 INFO MainThread:230104 [wandb_init.py:init():789] starting backend -2025-02-12 13:49:42,545 INFO MainThread:230104 [wandb_init.py:init():793] sending inform_init request -2025-02-12 13:49:42,548 INFO MainThread:230104 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 13:49:42,549 INFO MainThread:230104 [wandb_init.py:init():808] backend started and connected -2025-02-12 13:49:42,550 INFO MainThread:230104 [wandb_init.py:init():901] updated telemetry -2025-02-12 13:49:42,553 INFO MainThread:230104 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 13:49:42,912 INFO MainThread:230104 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 13:49:43,026 INFO MainThread:230104 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 13:49:43,026 INFO MainThread:230104 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 13:49:43,026 INFO MainThread:230104 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 13:49:43,026 INFO MainThread:230104 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 13:49:43,027 INFO MainThread:230104 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 13:49:43,028 INFO MainThread:230104 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-49-16_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 13:49:43,031 INFO MainThread:230104 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 13:49:43,031 INFO MainThread:230104 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 13:49:43,066 WARNING MsgRouterThr:230104 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb b/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb deleted file mode 100644 index d95543eeebd7c7ded69dd770ef822456843f59fd..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb and /dev/null differ diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml b/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml deleted file mode 100644 index 1f1ca62c76dba92301a01886cb09958ac5a1f9f8..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_13-51-27_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/output.log b/wandb/run-20250212_135151-5m7b3lhr/files/output.log deleted file mode 100644 index da5340cfe9cc02a517c9de0cdb66c052b387beab..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/files/output.log +++ /dev/null @@ -1,23 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 580, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 556, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt b/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json deleted file mode 100644 index 3e914497e6936a06c61ef66438821f8fa5b99f36..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T13:51:51.496687Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313777639424" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log deleted file mode 100644 index 16c1fd0e3b169fe267030e00e4d3679cb9c42a52..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T13:51:51.3124141Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcyi8bfs3/port-230520.txt","pid":230520,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T13:51:51.317857259Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230520} -{"time":"2025-02-12T13:51:51.317819419Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41833,"Zone":""}} -{"time":"2025-02-12T13:51:51.490296524Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:51.49928487Z","level":"INFO","msg":"handleInformInit: received","streamId":"5m7b3lhr","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:51.60503634Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"5m7b3lhr","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:52.040899031Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:52.0409399Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:52.04094382Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T13:51:52.04102198Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:52.213258576Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41833->127.0.0.1:51804: use of closed network connection","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:53.364540267Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:53.364555197Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51804"} -{"time":"2025-02-12T13:51:53.364566507Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log deleted file mode 100644 index b9aa4b44922e1558b02d06d29a822d654f48137f..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T13:51:51.499619967Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log"} -{"time":"2025-02-12T13:51:51.604967821Z","level":"INFO","msg":"created new stream","id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:51.60502639Z","level":"INFO","msg":"stream: started","id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:51.605130129Z","level":"INFO","msg":"writer: Do: started","stream_id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:51.605216728Z","level":"INFO","msg":"handler: started","stream_id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:51.605315277Z","level":"INFO","msg":"sender: started","stream_id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:51.888376389Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T13:51:52.0410007Z","level":"INFO","msg":"stream: closing","id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:52.041038759Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T13:51:52.041736053Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T13:51:53.021189887Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T13:51:53.364332489Z","level":"INFO","msg":"handler: closed","stream_id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:53.364389748Z","level":"INFO","msg":"sender: closed","stream_id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:53.364389018Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5m7b3lhr"} -{"time":"2025-02-12T13:51:53.364475307Z","level":"INFO","msg":"stream: closed","id":"5m7b3lhr"} diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log deleted file mode 100644 index a7e7ed2dbfef267db590321ad3622db7e9cf3621..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 13:51:51,279 INFO MainThread:230520 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 13:51:51,279 INFO MainThread:230520 [wandb_setup.py:_flush():68] Configure stats pid to 230520 -2025-02-12 13:51:51,279 INFO MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 13:51:51,279 INFO MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 13:51:51,279 INFO MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 13:51:51,279 INFO MainThread:230520 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log -2025-02-12 13:51:51,280 INFO MainThread:230520 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log -2025-02-12 13:51:51,280 INFO MainThread:230520 [wandb_init.py:init():756] calling init triggers -2025-02-12 13:51:51,280 INFO MainThread:230520 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 13:51:51,280 INFO MainThread:230520 [wandb_init.py:init():789] starting backend -2025-02-12 13:51:51,490 INFO MainThread:230520 [wandb_init.py:init():793] sending inform_init request -2025-02-12 13:51:51,496 INFO MainThread:230520 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 13:51:51,496 INFO MainThread:230520 [wandb_init.py:init():808] backend started and connected -2025-02-12 13:51:51,498 INFO MainThread:230520 [wandb_init.py:init():901] updated telemetry -2025-02-12 13:51:51,505 INFO MainThread:230520 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 13:51:51,885 INFO MainThread:230520 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 13:51:52,001 INFO MainThread:230520 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 13:51:52,001 INFO MainThread:230520 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 13:51:52,001 INFO MainThread:230520 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 13:51:52,001 INFO MainThread:230520 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 13:51:52,003 INFO MainThread:230520 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 13:51:52,004 INFO MainThread:230520 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-51-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 13:51:52,006 INFO MainThread:230520 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 13:51:52,006 INFO MainThread:230520 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 13:51:52,041 WARNING MsgRouterThr:230520 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb b/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb deleted file mode 100644 index 90536287bc7c440e01774fadf1beb280c4a85b3e..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb and /dev/null differ diff --git a/wandb/run-20250212_135331-x29lgb1q/files/config.yaml b/wandb/run-20250212_135331-x29lgb1q/files/config.yaml deleted file mode 100644 index 140e3770741b2dbcca969ef317059023c5bc500e..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135331-x29lgb1q/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_13-53-04_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_135331-x29lgb1q/files/output.log b/wandb/run-20250212_135331-x29lgb1q/files/output.log deleted file mode 100644 index 93558c333a13dd8d66af57c288bf367d12bd83cc..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135331-x29lgb1q/files/output.log +++ /dev/null @@ -1,2 +0,0 @@ - 0%| | 0/8000 [00:00127.0.0.1:43984: use of closed network connection","id":"127.0.0.1:43984"} -{"time":"2025-02-12T13:53:33.114823214Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:43984"} -{"time":"2025-02-12T13:53:33.114850744Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:43984"} -{"time":"2025-02-12T13:53:33.114865414Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log deleted file mode 100644 index dd1c8e4964b1bf449f85380ffb5d3a89ca81e194..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T13:53:31.257882961Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log"} -{"time":"2025-02-12T13:53:31.363436433Z","level":"INFO","msg":"created new stream","id":"x29lgb1q"} -{"time":"2025-02-12T13:53:31.363519102Z","level":"INFO","msg":"stream: started","id":"x29lgb1q"} -{"time":"2025-02-12T13:53:31.363646481Z","level":"INFO","msg":"writer: Do: started","stream_id":"x29lgb1q"} -{"time":"2025-02-12T13:53:31.36374892Z","level":"INFO","msg":"sender: started","stream_id":"x29lgb1q"} -{"time":"2025-02-12T13:53:31.363851899Z","level":"INFO","msg":"handler: started","stream_id":"x29lgb1q"} -{"time":"2025-02-12T13:53:31.741270453Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T13:53:31.886903155Z","level":"INFO","msg":"stream: closing","id":"x29lgb1q"} -{"time":"2025-02-12T13:53:31.886939864Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T13:53:31.887754548Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T13:53:32.909722722Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T13:53:33.114562596Z","level":"INFO","msg":"handler: closed","stream_id":"x29lgb1q"} -{"time":"2025-02-12T13:53:33.114621616Z","level":"INFO","msg":"writer: Close: closed","stream_id":"x29lgb1q"} -{"time":"2025-02-12T13:53:33.114647856Z","level":"INFO","msg":"sender: closed","stream_id":"x29lgb1q"} -{"time":"2025-02-12T13:53:33.114708545Z","level":"INFO","msg":"stream: closed","id":"x29lgb1q"} diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug.log deleted file mode 100644 index 305941888c14c138a20e3bb8a3a09cb3ea14fc13..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135331-x29lgb1q/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_setup.py:_flush():68] Configure stats pid to 230894 -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug.log -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_init.py:init():756] calling init triggers -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 13:53:31,043 INFO MainThread:230894 [wandb_init.py:init():789] starting backend -2025-02-12 13:53:31,248 INFO MainThread:230894 [wandb_init.py:init():793] sending inform_init request -2025-02-12 13:53:31,254 INFO MainThread:230894 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 13:53:31,254 INFO MainThread:230894 [wandb_init.py:init():808] backend started and connected -2025-02-12 13:53:31,255 INFO MainThread:230894 [wandb_init.py:init():901] updated telemetry -2025-02-12 13:53:31,260 INFO MainThread:230894 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 13:53:31,738 INFO MainThread:230894 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 13:53:31,846 INFO MainThread:230894 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 13:53:31,847 INFO MainThread:230894 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 13:53:31,847 INFO MainThread:230894 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 13:53:31,847 INFO MainThread:230894 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 13:53:31,848 INFO MainThread:230894 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 13:53:31,849 INFO MainThread:230894 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-53-04_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 13:53:31,852 INFO MainThread:230894 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 13:53:31,852 INFO MainThread:230894 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 13:53:31,887 WARNING MsgRouterThr:230894 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb b/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb deleted file mode 100644 index 4dde8c9fffacb3c18e046c0f82aeb24874408d5a..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb and /dev/null differ diff --git a/wandb/run-20250212_135435-53evlis5/files/config.yaml b/wandb/run-20250212_135435-53evlis5/files/config.yaml deleted file mode 100644 index 30f65cde960f35c69b0bdf95f4b9d9d2b3b1184f..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135435-53evlis5/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_13-54-12_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_135435-53evlis5/files/output.log b/wandb/run-20250212_135435-53evlis5/files/output.log deleted file mode 100644 index 93558c333a13dd8d66af57c288bf367d12bd83cc..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135435-53evlis5/files/output.log +++ /dev/null @@ -1,2 +0,0 @@ - 0%| | 0/8000 [00:00127.0.0.1:35906: use of closed network connection","id":"127.0.0.1:35906"} -{"time":"2025-02-12T13:54:38.005429072Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35906"} -{"time":"2025-02-12T13:54:38.005459881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35906"} -{"time":"2025-02-12T13:54:38.005482981Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log b/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log deleted file mode 100644 index 12101c30e8dcabd6c545c213d9abcb5c366354b2..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T13:54:36.204103004Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug-core.log"} -{"time":"2025-02-12T13:54:36.309862037Z","level":"INFO","msg":"created new stream","id":"53evlis5"} -{"time":"2025-02-12T13:54:36.309910267Z","level":"INFO","msg":"stream: started","id":"53evlis5"} -{"time":"2025-02-12T13:54:36.310009206Z","level":"INFO","msg":"writer: Do: started","stream_id":"53evlis5"} -{"time":"2025-02-12T13:54:36.310099665Z","level":"INFO","msg":"sender: started","stream_id":"53evlis5"} -{"time":"2025-02-12T13:54:36.310319183Z","level":"INFO","msg":"handler: started","stream_id":"53evlis5"} -{"time":"2025-02-12T13:54:36.609964328Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T13:54:36.758890355Z","level":"INFO","msg":"stream: closing","id":"53evlis5"} -{"time":"2025-02-12T13:54:36.758937814Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T13:54:36.759635628Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T13:54:37.773850697Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T13:54:38.005111685Z","level":"INFO","msg":"handler: closed","stream_id":"53evlis5"} -{"time":"2025-02-12T13:54:38.005175764Z","level":"INFO","msg":"sender: closed","stream_id":"53evlis5"} -{"time":"2025-02-12T13:54:38.005166594Z","level":"INFO","msg":"writer: Close: closed","stream_id":"53evlis5"} -{"time":"2025-02-12T13:54:38.005321073Z","level":"INFO","msg":"stream: closed","id":"53evlis5"} diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug.log b/wandb/run-20250212_135435-53evlis5/logs/debug.log deleted file mode 100644 index 5f43982224cb3297178d00e7d017b3a59158840e..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135435-53evlis5/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 13:54:35,983 INFO MainThread:231248 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 13:54:35,983 INFO MainThread:231248 [wandb_setup.py:_flush():68] Configure stats pid to 231248 -2025-02-12 13:54:35,983 INFO MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 13:54:35,983 INFO MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 13:54:35,983 INFO MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 13:54:35,984 INFO MainThread:231248 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug.log -2025-02-12 13:54:35,984 INFO MainThread:231248 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log -2025-02-12 13:54:35,984 INFO MainThread:231248 [wandb_init.py:init():756] calling init triggers -2025-02-12 13:54:35,984 INFO MainThread:231248 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 13:54:35,984 INFO MainThread:231248 [wandb_init.py:init():789] starting backend -2025-02-12 13:54:36,194 INFO MainThread:231248 [wandb_init.py:init():793] sending inform_init request -2025-02-12 13:54:36,200 INFO MainThread:231248 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 13:54:36,200 INFO MainThread:231248 [wandb_init.py:init():808] backend started and connected -2025-02-12 13:54:36,203 INFO MainThread:231248 [wandb_init.py:init():901] updated telemetry -2025-02-12 13:54:36,210 INFO MainThread:231248 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 13:54:36,606 INFO MainThread:231248 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 13:54:36,718 INFO MainThread:231248 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 13:54:36,718 INFO MainThread:231248 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 13:54:36,718 INFO MainThread:231248 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 13:54:36,718 INFO MainThread:231248 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 13:54:36,720 INFO MainThread:231248 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 13:54:36,721 INFO MainThread:231248 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-54-12_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 13:54:36,723 INFO MainThread:231248 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 13:54:36,723 INFO MainThread:231248 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 13:54:36,758 WARNING MsgRouterThr:231248 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb b/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb deleted file mode 100644 index 7ec7ba20b62cc55f53b817904fd528e0aeac4066..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb and /dev/null differ diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml b/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml deleted file mode 100644 index c51c497794483c0b0380f14431b8680dc5f7ba9b..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_13-56-06_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/output.log b/wandb/run-20250212_135631-dnrqwgd0/files/output.log deleted file mode 100644 index 0c7c41f13b26424a300a12e79a0240a7acd5a6a5..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/files/output.log +++ /dev/null @@ -1,23 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt b/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json deleted file mode 100644 index 65ae49a1584b5c0324cbf98ee13156b2a98ec35e..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T13:56:31.423900Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313777905664" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log deleted file mode 100644 index b147dcc35bf777332920dc3f3ca4421b26256461..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T13:56:31.233734243Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmp_1p0wl/port-231645.txt","pid":231645,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T13:56:31.237208984Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":231645} -{"time":"2025-02-12T13:56:31.237177324Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38409,"Zone":""}} -{"time":"2025-02-12T13:56:31.418128107Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:31.426328298Z","level":"INFO","msg":"handleInformInit: received","streamId":"dnrqwgd0","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:31.532291862Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"dnrqwgd0","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:31.979710518Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:31.979771727Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:31.979834686Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T13:56:31.979890276Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:32.195706105Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:38409->127.0.0.1:60084: use of closed network connection","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:33.231929311Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:33.231969701Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:60084"} -{"time":"2025-02-12T13:56:33.231990701Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log deleted file mode 100644 index dd5c1ea629fb01670d3aa33b82105add3e58fdbf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T13:56:31.426643885Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log"} -{"time":"2025-02-12T13:56:31.532231333Z","level":"INFO","msg":"created new stream","id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:31.532282222Z","level":"INFO","msg":"stream: started","id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:31.532401851Z","level":"INFO","msg":"writer: Do: started","stream_id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:31.532436711Z","level":"INFO","msg":"sender: started","stream_id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:31.53251893Z","level":"INFO","msg":"handler: started","stream_id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:31.831057361Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T13:56:31.979836506Z","level":"INFO","msg":"stream: closing","id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:31.979949235Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T13:56:31.981148335Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T13:56:33.006463404Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T13:56:33.231629384Z","level":"INFO","msg":"handler: closed","stream_id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:33.231703643Z","level":"INFO","msg":"writer: Close: closed","stream_id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:33.231748163Z","level":"INFO","msg":"sender: closed","stream_id":"dnrqwgd0"} -{"time":"2025-02-12T13:56:33.231782443Z","level":"INFO","msg":"stream: closed","id":"dnrqwgd0"} diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log deleted file mode 100644 index e628857ea58f73e686450dd9fd9e0941437c4aac..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_setup.py:_flush():68] Configure stats pid to 231645 -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_init.py:init():756] calling init triggers -2025-02-12 13:56:31,208 INFO MainThread:231645 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 13:56:31,209 INFO MainThread:231645 [wandb_init.py:init():789] starting backend -2025-02-12 13:56:31,417 INFO MainThread:231645 [wandb_init.py:init():793] sending inform_init request -2025-02-12 13:56:31,423 INFO MainThread:231645 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 13:56:31,423 INFO MainThread:231645 [wandb_init.py:init():808] backend started and connected -2025-02-12 13:56:31,425 INFO MainThread:231645 [wandb_init.py:init():901] updated telemetry -2025-02-12 13:56:31,430 INFO MainThread:231645 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 13:56:31,828 INFO MainThread:231645 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 13:56:31,939 INFO MainThread:231645 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 13:56:31,939 INFO MainThread:231645 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 13:56:31,939 INFO MainThread:231645 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 13:56:31,939 INFO MainThread:231645 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 13:56:31,941 INFO MainThread:231645 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 13:56:31,942 INFO MainThread:231645 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-56-06_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 13:56:31,944 INFO MainThread:231645 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 13:56:31,945 INFO MainThread:231645 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 13:56:31,980 WARNING MsgRouterThr:231645 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb b/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb deleted file mode 100644 index f4fa1b56219f3fbe8b9ded10000ea6f8e8711b5b..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb and /dev/null differ diff --git a/wandb/run-20250212_140456-85d9ssit/files/config.yaml b/wandb/run-20250212_140456-85d9ssit/files/config.yaml deleted file mode 100644 index 20459048deba1181e28d01917b6ee58a0b19c371..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_14-04-28_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_140456-85d9ssit/files/output.log b/wandb/run-20250212_140456-85d9ssit/files/output.log deleted file mode 100644 index 47ae9b884ed0bd7b0b1e663b294089b5065b6378..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/files/output.log +++ /dev/null @@ -1,22 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop - self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin - return self.call_event("on_epoch_begin", args, state, control) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event - result = getattr(callback, event)( - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin - if isinstance(train_dataloader.dataset, IterableDatasetShard): - ^^^^^^^^^^^^^^^^^^^^^^^^ -AttributeError: 'NoneType' object has no attribute 'dataset' diff --git a/wandb/run-20250212_140456-85d9ssit/files/requirements.txt b/wandb/run-20250212_140456-85d9ssit/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json b/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json deleted file mode 100644 index 609a64191cc1e6ded5bd0a4031c0e83b27c08926..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json +++ /dev/null @@ -1,87 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T14:04:56.751445Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--audio_column_name=audio", - "--text_column_name=sentence", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "313778016256" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json b/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json deleted file mode 100644 index 6c37fe1cbbb8aed86fd461a79642cb991e4d35cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":0}} \ No newline at end of file diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log b/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log deleted file mode 100644 index f601fa971eb613b8b904449c275d1827295ba786..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log +++ /dev/null @@ -1,14 +0,0 @@ -{"time":"2025-02-12T14:04:56.567564578Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqs28ml67/port-232359.txt","pid":232359,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T14:04:56.573119086Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":232359} -{"time":"2025-02-12T14:04:56.573060477Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41429,"Zone":""}} -{"time":"2025-02-12T14:04:56.745144471Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:56.753547786Z","level":"INFO","msg":"handleInformInit: received","streamId":"85d9ssit","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:56.859061499Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"85d9ssit","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:57.327873486Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:57.327950506Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:57.327989686Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T14:04:57.328056845Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:57.543980132Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41429->127.0.0.1:35380: use of closed network connection","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:58.65202789Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:58.65205631Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35380"} -{"time":"2025-02-12T14:04:58.65210661Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log b/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log deleted file mode 100644 index 34dfbdb326b60005e9dada05ae305ad436dfd321..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T14:04:56.753826604Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log"} -{"time":"2025-02-12T14:04:56.859010159Z","level":"INFO","msg":"created new stream","id":"85d9ssit"} -{"time":"2025-02-12T14:04:56.859052399Z","level":"INFO","msg":"stream: started","id":"85d9ssit"} -{"time":"2025-02-12T14:04:56.859127668Z","level":"INFO","msg":"writer: Do: started","stream_id":"85d9ssit"} -{"time":"2025-02-12T14:04:56.859226827Z","level":"INFO","msg":"sender: started","stream_id":"85d9ssit"} -{"time":"2025-02-12T14:04:56.859302168Z","level":"INFO","msg":"handler: started","stream_id":"85d9ssit"} -{"time":"2025-02-12T14:04:57.172644512Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T14:04:57.327994966Z","level":"INFO","msg":"stream: closing","id":"85d9ssit"} -{"time":"2025-02-12T14:04:57.328025695Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T14:04:57.328790509Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T14:04:58.425349995Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T14:04:58.651670973Z","level":"INFO","msg":"handler: closed","stream_id":"85d9ssit"} -{"time":"2025-02-12T14:04:58.651730382Z","level":"INFO","msg":"sender: closed","stream_id":"85d9ssit"} -{"time":"2025-02-12T14:04:58.651712323Z","level":"INFO","msg":"writer: Close: closed","stream_id":"85d9ssit"} -{"time":"2025-02-12T14:04:58.651855241Z","level":"INFO","msg":"stream: closed","id":"85d9ssit"} diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug.log b/wandb/run-20250212_140456-85d9ssit/logs/debug.log deleted file mode 100644 index 1164b0f163d2f03c5598f4e9dbd5df03ddd05535..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_140456-85d9ssit/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_setup.py:_flush():68] Configure stats pid to 232359 -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug.log -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_init.py:init():756] calling init triggers -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 14:04:56,535 INFO MainThread:232359 [wandb_init.py:init():789] starting backend -2025-02-12 14:04:56,745 INFO MainThread:232359 [wandb_init.py:init():793] sending inform_init request -2025-02-12 14:04:56,750 INFO MainThread:232359 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 14:04:56,751 INFO MainThread:232359 [wandb_init.py:init():808] backend started and connected -2025-02-12 14:04:56,753 INFO MainThread:232359 [wandb_init.py:init():901] updated telemetry -2025-02-12 14:04:56,760 INFO MainThread:232359 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 14:04:57,169 INFO MainThread:232359 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 14:04:57,287 INFO MainThread:232359 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 14:04:57,287 INFO MainThread:232359 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 14:04:57,287 INFO MainThread:232359 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 14:04:57,287 INFO MainThread:232359 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 14:04:57,289 INFO MainThread:232359 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 14:04:57,290 INFO MainThread:232359 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-04-28_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 14:04:57,292 INFO MainThread:232359 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 14:04:57,292 INFO MainThread:232359 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 14:04:57,328 WARNING MsgRouterThr:232359 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb b/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb deleted file mode 100644 index 6aecf6528222a0b9ca923222cc04215f30e7b215..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb and /dev/null differ diff --git a/wandb/run-20250212_144814-onbjaexn/files/config.yaml b/wandb/run-20250212_144814-onbjaexn/files/config.yaml deleted file mode 100644 index e7bf2050c408e1c177d2cf067168af9e543d1374..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_14-47-46_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_144814-onbjaexn/files/output.log b/wandb/run-20250212_144814-onbjaexn/files/output.log deleted file mode 100644 index 3a117abe7df89f3665e8ab773843e3452ad54bec..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/files/output.log +++ /dev/null @@ -1,49 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 584, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop - batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples - batch_samples += [next(epoch_iterator)] - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__ - next_batch, next_batch_info = self._fetch_batches(main_iterator) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches - batches.append(next(iterator)) - ^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch - data.append(next(self.dataset_iter)) - ^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__ - for key, example in ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter - for key, example in iterator: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__ - for key, example in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__ - for x in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter - processed_inputs = self.function(*function_args, **self.fn_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 474, in prepare_dataset - inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) - ~~~~~~^^^^^^^^^ -KeyError: 'array' diff --git a/wandb/run-20250212_144814-onbjaexn/files/requirements.txt b/wandb/run-20250212_144814-onbjaexn/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json b/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json deleted file mode 100644 index 3617f33b6a9e0d7a88e02c955cc5ba8d7786fe2e..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T14:48:14.426245Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "314421264384" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json b/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json deleted file mode 100644 index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":2}} \ No newline at end of file diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log b/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log deleted file mode 100644 index 6129efa42871aad3a2eaf1f4ed957947fcadfc81..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log +++ /dev/null @@ -1,13 +0,0 @@ -{"time":"2025-02-12T14:48:14.241936287Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpn0iuoxdb/port-235726.txt","pid":235726,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T14:48:14.269915432Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":235726} -{"time":"2025-02-12T14:48:14.269970272Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44879,"Zone":""}} -{"time":"2025-02-12T14:48:14.42013725Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:14.429435808Z","level":"INFO","msg":"handleInformInit: received","streamId":"onbjaexn","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:14.534605813Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"onbjaexn","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:17.23040957Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:17.230519299Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:17.230528979Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T14:48:17.230598258Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:18.137682794Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:18.137715544Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:38426"} -{"time":"2025-02-12T14:48:18.137735074Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log b/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log deleted file mode 100644 index 20bf3cf198ac1918e999652096a9c93e415993c3..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T14:48:14.429796675Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log"} -{"time":"2025-02-12T14:48:14.534527814Z","level":"INFO","msg":"created new stream","id":"onbjaexn"} -{"time":"2025-02-12T14:48:14.534596853Z","level":"INFO","msg":"stream: started","id":"onbjaexn"} -{"time":"2025-02-12T14:48:14.534694833Z","level":"INFO","msg":"writer: Do: started","stream_id":"onbjaexn"} -{"time":"2025-02-12T14:48:14.534760432Z","level":"INFO","msg":"handler: started","stream_id":"onbjaexn"} -{"time":"2025-02-12T14:48:14.534942391Z","level":"INFO","msg":"sender: started","stream_id":"onbjaexn"} -{"time":"2025-02-12T14:48:14.842151491Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T14:48:17.230483349Z","level":"INFO","msg":"stream: closing","id":"onbjaexn"} -{"time":"2025-02-12T14:48:17.230507079Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T14:48:17.231180494Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T14:48:17.906287174Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T14:48:18.137280277Z","level":"INFO","msg":"handler: closed","stream_id":"onbjaexn"} -{"time":"2025-02-12T14:48:18.137369966Z","level":"INFO","msg":"writer: Close: closed","stream_id":"onbjaexn"} -{"time":"2025-02-12T14:48:18.137418376Z","level":"INFO","msg":"sender: closed","stream_id":"onbjaexn"} -{"time":"2025-02-12T14:48:18.137549935Z","level":"INFO","msg":"stream: closed","id":"onbjaexn"} diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug.log b/wandb/run-20250212_144814-onbjaexn/logs/debug.log deleted file mode 100644 index ef4069012800c296bd0c14d5ed4d73d0c2467c96..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_144814-onbjaexn/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 14:48:14,212 INFO MainThread:235726 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 14:48:14,212 INFO MainThread:235726 [wandb_setup.py:_flush():68] Configure stats pid to 235726 -2025-02-12 14:48:14,212 INFO MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 14:48:14,212 INFO MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 14:48:14,212 INFO MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 14:48:14,213 INFO MainThread:235726 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug.log -2025-02-12 14:48:14,213 INFO MainThread:235726 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log -2025-02-12 14:48:14,213 INFO MainThread:235726 [wandb_init.py:init():756] calling init triggers -2025-02-12 14:48:14,213 INFO MainThread:235726 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 14:48:14,213 INFO MainThread:235726 [wandb_init.py:init():789] starting backend -2025-02-12 14:48:14,419 INFO MainThread:235726 [wandb_init.py:init():793] sending inform_init request -2025-02-12 14:48:14,425 INFO MainThread:235726 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 14:48:14,425 INFO MainThread:235726 [wandb_init.py:init():808] backend started and connected -2025-02-12 14:48:14,428 INFO MainThread:235726 [wandb_init.py:init():901] updated telemetry -2025-02-12 14:48:14,434 INFO MainThread:235726 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 14:48:14,839 INFO MainThread:235726 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 14:48:14,950 INFO MainThread:235726 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 14:48:14,950 INFO MainThread:235726 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 14:48:14,950 INFO MainThread:235726 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 14:48:14,950 INFO MainThread:235726 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 14:48:14,951 INFO MainThread:235726 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 14:48:14,953 INFO MainThread:235726 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-47-46_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 14:48:14,955 INFO MainThread:235726 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 14:48:14,955 INFO MainThread:235726 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 14:48:17,230 WARNING MsgRouterThr:235726 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb b/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb deleted file mode 100644 index 3bcda516eefb5fc320ac07ed1aa34f5c893f6e18..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb and /dev/null differ diff --git a/wandb/run-20250212_145250-7h6sh6az/files/config.yaml b/wandb/run-20250212_145250-7h6sh6az/files/config.yaml deleted file mode 100644 index 1659e855217bf787e28b7c6fcdf6754048d4f34e..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_14-52-23_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_145250-7h6sh6az/files/output.log b/wandb/run-20250212_145250-7h6sh6az/files/output.log deleted file mode 100644 index ed56b0930f3380f5d2cd8b2ba4660678179dbb05..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/files/output.log +++ /dev/null @@ -1,52 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 606, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop - batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples - batch_samples += [next(epoch_iterator)] - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__ - next_batch, next_batch_info = self._fetch_batches(main_iterator) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches - batches.append(next(iterator)) - ^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch - data.append(next(self.dataset_iter)) - ^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__ - for key, example in ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter - for key, example in iterator: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__ - for key, example in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__ - for x in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter - processed_inputs = self.function(*function_args, **self.fn_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 492, in prepare_dataset - inputs = feature_extractor(audio_array, sampling_rate=sampling_rate) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/models/whisper/feature_extraction_whisper.py", line 265, in __call__ - raw_speech = np.asarray(raw_speech, dtype=np.float32) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -ValueError: could not convert string to float: 'common_voice_eu_39287311.wav' diff --git a/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt b/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json b/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json deleted file mode 100644 index e0270308edb884f1317d7a8c8ecd6d03846cb42d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T14:52:51.028960Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "315195543552" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json b/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json deleted file mode 100644 index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":2}} \ No newline at end of file diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log deleted file mode 100644 index bdc472ce22920952bc84d9ac978de3754c21a2ea..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log +++ /dev/null @@ -1,13 +0,0 @@ -{"time":"2025-02-12T14:52:50.845987197Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp6ead6ms8/port-236505.txt","pid":236505,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T14:52:50.851144401Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":236505} -{"time":"2025-02-12T14:52:50.851121011Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45803,"Zone":""}} -{"time":"2025-02-12T14:52:51.022520498Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:51.031293581Z","level":"INFO","msg":"handleInformInit: received","streamId":"7h6sh6az","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:51.13681882Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"7h6sh6az","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:53.567639763Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:53.567714252Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:53.567766542Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T14:52:53.567883362Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:54.608402958Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:54.608420568Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:43038"} -{"time":"2025-02-12T14:52:54.608430008Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log deleted file mode 100644 index 193e726955b8de9172c8a8da3854bacc0b3770ef..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T14:52:51.031699779Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log"} -{"time":"2025-02-12T14:52:51.13677806Z","level":"INFO","msg":"created new stream","id":"7h6sh6az"} -{"time":"2025-02-12T14:52:51.13681223Z","level":"INFO","msg":"stream: started","id":"7h6sh6az"} -{"time":"2025-02-12T14:52:51.13682766Z","level":"INFO","msg":"writer: Do: started","stream_id":"7h6sh6az"} -{"time":"2025-02-12T14:52:51.136887979Z","level":"INFO","msg":"handler: started","stream_id":"7h6sh6az"} -{"time":"2025-02-12T14:52:51.136996279Z","level":"INFO","msg":"sender: started","stream_id":"7h6sh6az"} -{"time":"2025-02-12T14:52:51.40503864Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T14:52:53.567741802Z","level":"INFO","msg":"stream: closing","id":"7h6sh6az"} -{"time":"2025-02-12T14:52:53.567786892Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T14:52:53.56844074Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T14:52:54.38014778Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T14:52:54.608180109Z","level":"INFO","msg":"handler: closed","stream_id":"7h6sh6az"} -{"time":"2025-02-12T14:52:54.608239299Z","level":"INFO","msg":"writer: Close: closed","stream_id":"7h6sh6az"} -{"time":"2025-02-12T14:52:54.608252109Z","level":"INFO","msg":"sender: closed","stream_id":"7h6sh6az"} -{"time":"2025-02-12T14:52:54.608324618Z","level":"INFO","msg":"stream: closed","id":"7h6sh6az"} diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug.log deleted file mode 100644 index 9e0a5b7af23b12f958416746291b2016b27b0920..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145250-7h6sh6az/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_setup.py:_flush():68] Configure stats pid to 236505 -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug.log -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_init.py:init():756] calling init triggers -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 14:52:50,817 INFO MainThread:236505 [wandb_init.py:init():789] starting backend -2025-02-12 14:52:51,022 INFO MainThread:236505 [wandb_init.py:init():793] sending inform_init request -2025-02-12 14:52:51,028 INFO MainThread:236505 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 14:52:51,028 INFO MainThread:236505 [wandb_init.py:init():808] backend started and connected -2025-02-12 14:52:51,030 INFO MainThread:236505 [wandb_init.py:init():901] updated telemetry -2025-02-12 14:52:51,037 INFO MainThread:236505 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 14:52:51,402 INFO MainThread:236505 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 14:52:51,519 INFO MainThread:236505 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 14:52:51,519 INFO MainThread:236505 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 14:52:51,519 INFO MainThread:236505 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 14:52:51,519 INFO MainThread:236505 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 14:52:51,521 INFO MainThread:236505 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 14:52:51,522 INFO MainThread:236505 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-52-23_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 14:52:51,524 INFO MainThread:236505 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 14:52:51,524 INFO MainThread:236505 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 14:52:53,567 WARNING MsgRouterThr:236505 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb b/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb deleted file mode 100644 index 11838bad33d6e4a1a00df232467c3bd278b754a5..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb and /dev/null differ diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml b/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml deleted file mode 100644 index 1aee5532694c9c3a16dde77aeb5d364ddac0c141..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_14-54-21_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/output.log b/wandb/run-20250212_145446-ncnr0yzu/files/output.log deleted file mode 100644 index f01eb94ffe96faade97a93f96968111bebc6ef65..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/files/output.log +++ /dev/null @@ -1,52 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 602, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop - batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples - batch_samples += [next(epoch_iterator)] - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__ - next_batch, next_batch_info = self._fetch_batches(main_iterator) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches - batches.append(next(iterator)) - ^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch - data.append(next(self.dataset_iter)) - ^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__ - for key, example in ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter - for key, example in iterator: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__ - for key, example in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__ - for x in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter - processed_inputs = self.function(*function_args, **self.fn_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 484, in prepare_dataset - inputs = feature_extractor(batch[audio_column_name], sampling_rate=feature_extractor.sampling_rate) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/models/whisper/feature_extraction_whisper.py", line 265, in __call__ - raw_speech = np.asarray(raw_speech, dtype=np.float32) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -TypeError: float() argument must be a string or a real number, not 'dict' diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt b/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json deleted file mode 100644 index 5f58184ad4e6e2757e4e20e41ecb4b819e7e4daf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T14:54:46.573889Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "315195682816" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json deleted file mode 100644 index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":2}} \ No newline at end of file diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log deleted file mode 100644 index db5aa803baede20483e71c9d07c375e3db38e6fd..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log +++ /dev/null @@ -1,13 +0,0 @@ -{"time":"2025-02-12T14:54:46.391038145Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpn5cre9oi/port-236985.txt","pid":236985,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T14:54:46.395903327Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":236985} -{"time":"2025-02-12T14:54:46.395861037Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38029,"Zone":""}} -{"time":"2025-02-12T14:54:46.567245341Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:46.576515207Z","level":"INFO","msg":"handleInformInit: received","streamId":"ncnr0yzu","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:46.683228559Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ncnr0yzu","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:48.66373831Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:48.66385373Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T14:54:48.66383821Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:48.663953679Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:49.591574304Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:49.591599923Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:46988"} -{"time":"2025-02-12T14:54:49.591615933Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log deleted file mode 100644 index 4e45c5ab16d921728ca58083736e7bfa341dd2e1..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T14:54:46.576935865Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log"} -{"time":"2025-02-12T14:54:46.683143569Z","level":"INFO","msg":"created new stream","id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:46.683218799Z","level":"INFO","msg":"stream: started","id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:46.683354688Z","level":"INFO","msg":"writer: Do: started","stream_id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:46.683407538Z","level":"INFO","msg":"sender: started","stream_id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:46.683417878Z","level":"INFO","msg":"handler: started","stream_id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:46.986318334Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T14:54:48.66385817Z","level":"INFO","msg":"stream: closing","id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:48.66390572Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T14:54:48.664605317Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T14:54:49.357544434Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T14:54:49.591242244Z","level":"INFO","msg":"handler: closed","stream_id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:49.591302134Z","level":"INFO","msg":"writer: Close: closed","stream_id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:49.591331464Z","level":"INFO","msg":"sender: closed","stream_id":"ncnr0yzu"} -{"time":"2025-02-12T14:54:49.591428814Z","level":"INFO","msg":"stream: closed","id":"ncnr0yzu"} diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log deleted file mode 100644 index d1847e519db8f0646fddd46ec510ca163b76e2ca..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_setup.py:_flush():68] Configure stats pid to 236985 -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_init.py:init():756] calling init triggers -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 14:54:46,356 INFO MainThread:236985 [wandb_init.py:init():789] starting backend -2025-02-12 14:54:46,567 INFO MainThread:236985 [wandb_init.py:init():793] sending inform_init request -2025-02-12 14:54:46,573 INFO MainThread:236985 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 14:54:46,573 INFO MainThread:236985 [wandb_init.py:init():808] backend started and connected -2025-02-12 14:54:46,575 INFO MainThread:236985 [wandb_init.py:init():901] updated telemetry -2025-02-12 14:54:46,582 INFO MainThread:236985 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 14:54:46,982 INFO MainThread:236985 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 14:54:47,097 INFO MainThread:236985 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 14:54:47,097 INFO MainThread:236985 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 14:54:47,097 INFO MainThread:236985 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 14:54:47,097 INFO MainThread:236985 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 14:54:47,099 INFO MainThread:236985 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 14:54:47,100 INFO MainThread:236985 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-54-21_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 14:54:47,102 INFO MainThread:236985 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 14:54:47,102 INFO MainThread:236985 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 14:54:48,664 WARNING MsgRouterThr:236985 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb b/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb deleted file mode 100644 index 65c0bd3fd32e7d2796b16127fa20b73e04cb090e..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb and /dev/null differ diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml b/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml deleted file mode 100644 index 1b8c28c55668d6406f48f51123b6234e497287be..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml +++ /dev/null @@ -1,512 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_14-58-28_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/output.log b/wandb/run-20250212_145852-0gfsy6hh/files/output.log deleted file mode 100644 index e5c0ebecf44b6daff661d6bce1288f2c744ab14f..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/files/output.log +++ /dev/null @@ -1,49 +0,0 @@ - 0%| | 0/8000 [00:00 - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 588, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop - batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples - batch_samples += [next(epoch_iterator)] - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__ - next_batch, next_batch_info = self._fetch_batches(main_iterator) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches - batches.append(next(iterator)) - ^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch - data.append(next(self.dataset_iter)) - ^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__ - for key, example in ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter - for key, example in iterator: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__ - for key, example in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__ - for x in self.ex_iterable: - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__ - yield from self._iter() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter - processed_inputs = self.function(*function_args, **self.fn_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 477, in prepare_dataset - audio_array = sample["array"] - ~~~~~~^^^^^^^^^ -KeyError: 'array' diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt b/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json deleted file mode 100644 index 91a6670d98692edca3c7c2302ed41497a23b0b97..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T14:58:52.625032Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "315206733824" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json deleted file mode 100644 index 1d52051e315a7a21a9d9e5a40a517408bb086162..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb":{"runtime":2}} \ No newline at end of file diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log deleted file mode 100644 index 5bc552cfe42bb92f4ee190b01d46a0bd5b93b82d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log +++ /dev/null @@ -1,13 +0,0 @@ -{"time":"2025-02-12T14:58:52.442304412Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpkux429nf/port-237900.txt","pid":237900,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T14:58:52.44698044Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":237900} -{"time":"2025-02-12T14:58:52.446971151Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":42317,"Zone":""}} -{"time":"2025-02-12T14:58:52.618246003Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:52.62556589Z","level":"INFO","msg":"handleInformInit: received","streamId":"0gfsy6hh","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:52.729897747Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"0gfsy6hh","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:54.959605329Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:54.959661068Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:54.959726878Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:54.959733858Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T14:58:55.879911345Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:55.879969174Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:39202"} -{"time":"2025-02-12T14:58:55.879979154Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log deleted file mode 100644 index c315ae03c741865692c58d12afec5388b478f0a7..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T14:58:52.62568889Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log"} -{"time":"2025-02-12T14:58:52.729819547Z","level":"INFO","msg":"created new stream","id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:52.729885047Z","level":"INFO","msg":"stream: started","id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:52.729955257Z","level":"INFO","msg":"sender: started","stream_id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:52.729941107Z","level":"INFO","msg":"writer: Do: started","stream_id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:52.730185226Z","level":"INFO","msg":"handler: started","stream_id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:52.998836495Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T14:58:54.959699198Z","level":"INFO","msg":"stream: closing","id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:54.959735308Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T14:58:54.960467105Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T14:58:55.63479944Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T14:58:55.878933079Z","level":"INFO","msg":"handler: closed","stream_id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:55.878978098Z","level":"INFO","msg":"sender: closed","stream_id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:55.878984668Z","level":"INFO","msg":"writer: Close: closed","stream_id":"0gfsy6hh"} -{"time":"2025-02-12T14:58:55.879264427Z","level":"INFO","msg":"stream: closed","id":"0gfsy6hh"} diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log deleted file mode 100644 index b6929aba4219df8569a99e62bdfd8415b77c4a11..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_setup.py:_flush():68] Configure stats pid to 237900 -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_init.py:init():756] calling init triggers -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 14:58:52,407 INFO MainThread:237900 [wandb_init.py:init():789] starting backend -2025-02-12 14:58:52,618 INFO MainThread:237900 [wandb_init.py:init():793] sending inform_init request -2025-02-12 14:58:52,624 INFO MainThread:237900 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 14:58:52,624 INFO MainThread:237900 [wandb_init.py:init():808] backend started and connected -2025-02-12 14:58:52,627 INFO MainThread:237900 [wandb_init.py:init():901] updated telemetry -2025-02-12 14:58:52,634 INFO MainThread:237900 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 14:58:52,995 INFO MainThread:237900 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 14:58:53,107 INFO MainThread:237900 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 14:58:53,107 INFO MainThread:237900 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 14:58:53,107 INFO MainThread:237900 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 14:58:53,107 INFO MainThread:237900 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 14:58:53,108 INFO MainThread:237900 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 14:58:53,110 INFO MainThread:237900 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-58-28_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 14:58:53,112 INFO MainThread:237900 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 14:58:53,112 INFO MainThread:237900 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 14:58:54,959 WARNING MsgRouterThr:237900 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb b/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb deleted file mode 100644 index 38232a1d6938c32ba68a2c2ebc20cb3a1500e7dc..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb and /dev/null differ diff --git a/wandb/run-20250212_152506-cp47eoxt/files/config.yaml b/wandb/run-20250212_152506-cp47eoxt/files/config.yaml deleted file mode 100644 index 8ad19f35aa9b0b821482834434adf1e4e566842e..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/files/config.yaml +++ /dev/null @@ -1,536 +0,0 @@ -_attn_implementation_autoset: - value: true -_name_or_path: - value: openai/whisper-small -_wandb: - value: - cli_version: 0.19.6 - m: - - "1": train/global_step - "6": - - 3 - "7": [] - - "1": train/loss - "5": 1 - "6": - - 1 - - 3 - "7": [] - - "1": train/grad_norm - "5": 1 - "6": - - 1 - - 3 - "7": [] - - "1": train/learning_rate - "5": 1 - "6": - - 1 - - 3 - "7": [] - - "1": train/epoch - "5": 1 - "6": - - 1 - - 3 - "7": [] - python_version: 3.12.3 - t: - "1": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "2": - - 1 - - 5 - - 11 - - 49 - - 51 - - 53 - - 55 - - 71 - - 100 - "3": - - 7 - - 13 - - 19 - - 23 - - 55 - - 66 - "4": 3.12.3 - "5": 0.19.6 - "6": 4.49.0.dev0 - "8": - - 5 - "9": - "1": transformers_trainer - "12": 0.19.6 - "13": linux-x86_64 -accelerator_config: - value: - dispatch_batches: null - even_batches: true - gradient_accumulation_kwargs: null - non_blocking: false - split_batches: false - use_seedable_sampler: true -activation_dropout: - value: 0 -activation_function: - value: gelu -adafactor: - value: false -adam_beta1: - value: 0.9 -adam_beta2: - value: 0.999 -adam_epsilon: - value: 1e-08 -add_cross_attention: - value: false -apply_spec_augment: - value: false -architectures: - value: - - WhisperForConditionalGeneration -attention_dropout: - value: 0 -auto_find_batch_size: - value: false -average_tokens_across_devices: - value: false -bad_words_ids: - value: null -batch_eval_metrics: - value: false -begin_suppress_tokens: - value: - - 220 - - 50257 -bf16: - value: false -bf16_full_eval: - value: false -bos_token_id: - value: 50257 -chunk_size_feed_forward: - value: 0 -classifier_proj_size: - value: 256 -cross_attention_hidden_size: - value: null -d_model: - value: 768 -data_seed: - value: null -dataloader_drop_last: - value: false -dataloader_num_workers: - value: 0 -dataloader_persistent_workers: - value: false -dataloader_pin_memory: - value: true -dataloader_prefetch_factor: - value: null -ddp_backend: - value: null -ddp_broadcast_buffers: - value: null -ddp_bucket_cap_mb: - value: null -ddp_find_unused_parameters: - value: null -ddp_timeout: - value: 1800 -debug: - value: [] -decoder_attention_heads: - value: 12 -decoder_ffn_dim: - value: 3072 -decoder_layerdrop: - value: 0 -decoder_layers: - value: 12 -decoder_start_token_id: - value: 50258 -deepspeed: - value: null -disable_tqdm: - value: false -dispatch_batches: - value: null -diversity_penalty: - value: 0 -do_eval: - value: true -do_predict: - value: false -do_sample: - value: false -do_train: - value: true -dropout: - value: 0 -early_stopping: - value: false -encoder_attention_heads: - value: 12 -encoder_ffn_dim: - value: 3072 -encoder_layerdrop: - value: 0 -encoder_layers: - value: 12 -encoder_no_repeat_ngram_size: - value: 0 -eos_token_id: - value: 50257 -eval_accumulation_steps: - value: null -eval_delay: - value: 0 -eval_do_concat_batches: - value: true -eval_on_start: - value: false -eval_steps: - value: 1000 -eval_strategy: - value: steps -eval_use_gather_object: - value: false -evaluation_strategy: - value: steps -exponential_decay_length_penalty: - value: null -finetuning_task: - value: null -forced_bos_token_id: - value: null -forced_decoder_ids: - value: null -forced_eos_token_id: - value: null -fp16: - value: true -fp16_backend: - value: auto -fp16_full_eval: - value: false -fp16_opt_level: - value: O1 -fsdp: - value: [] -fsdp_config: - value: - min_num_params: 0 - xla: false - xla_fsdp_grad_ckpt: false - xla_fsdp_v2: false -fsdp_min_num_params: - value: 0 -fsdp_transformer_layer_cls_to_wrap: - value: null -full_determinism: - value: false -generation_config: - value: null -generation_max_length: - value: 225 -generation_num_beams: - value: null -gradient_accumulation_steps: - value: 1 -gradient_checkpointing: - value: true -gradient_checkpointing_kwargs: - value: null -greater_is_better: - value: false -group_by_length: - value: false -half_precision_backend: - value: auto -hub_always_push: - value: false -hub_model_id: - value: null -hub_private_repo: - value: null -hub_strategy: - value: every_save -hub_token: - value: -id2label: - value: - "0": LABEL_0 - "1": LABEL_1 -ignore_data_skip: - value: false -include_for_metrics: - value: [] -include_inputs_for_metrics: - value: false -include_num_input_tokens_seen: - value: false -include_tokens_per_second: - value: false -init_std: - value: 0.02 -is_decoder: - value: false -is_encoder_decoder: - value: true -jit_mode_eval: - value: false -label_names: - value: null -label_smoothing_factor: - value: 0 -label2id: - value: - LABEL_0: 0 - LABEL_1: 1 -learning_rate: - value: 1e-05 -length_column_name: - value: input_length -length_penalty: - value: 1 -load_best_model_at_end: - value: true -local_rank: - value: 0 -log_level: - value: passive -log_level_replica: - value: warning -log_on_each_node: - value: true -logging_dir: - value: ./runs/Feb12_15-24-15_tknika -logging_first_step: - value: false -logging_nan_inf_filter: - value: true -logging_steps: - value: 25 -logging_strategy: - value: steps -lr_scheduler_type: - value: linear -mask_feature_length: - value: 10 -mask_feature_min_masks: - value: 0 -mask_feature_prob: - value: 0 -mask_time_length: - value: 10 -mask_time_min_masks: - value: 2 -mask_time_prob: - value: 0.05 -max_grad_norm: - value: 1 -max_length: - value: 448 -max_source_positions: - value: 1500 -max_steps: - value: 8000 -max_target_positions: - value: 448 -median_filter_width: - value: 7 -metric_for_best_model: - value: wer -min_length: - value: 0 -model/num_parameters: - value: 241734912 -model_type: - value: whisper -mp_parameters: - value: "" -neftune_noise_alpha: - value: null -no_cuda: - value: false -no_repeat_ngram_size: - value: 0 -num_beam_groups: - value: 1 -num_beams: - value: 1 -num_hidden_layers: - value: 12 -num_mel_bins: - value: 80 -num_return_sequences: - value: 1 -num_train_epochs: - value: 3 -optim: - value: adamw_torch -optim_args: - value: null -optim_target_modules: - value: null -output_attentions: - value: false -output_dir: - value: ./ -output_hidden_states: - value: false -output_scores: - value: false -overwrite_output_dir: - value: true -pad_token_id: - value: 50257 -past_index: - value: -1 -per_device_eval_batch_size: - value: 16 -per_device_train_batch_size: - value: 32 -per_gpu_eval_batch_size: - value: null -per_gpu_train_batch_size: - value: null -predict_with_generate: - value: true -prediction_loss_only: - value: false -prefix: - value: null -problem_type: - value: null -push_to_hub: - value: true -push_to_hub_model_id: - value: null -push_to_hub_organization: - value: null -push_to_hub_token: - value: -ray_scope: - value: last -remove_invalid_values: - value: false -remove_unused_columns: - value: true -repetition_penalty: - value: 1 -report_to: - value: - - wandb -restore_callback_states_from_checkpoint: - value: false -resume_from_checkpoint: - value: null -return_dict: - value: true -return_dict_in_generate: - value: false -run_name: - value: whisper-small-eu -save_on_each_node: - value: false -save_only_model: - value: false -save_safetensors: - value: true -save_steps: - value: 1000 -save_strategy: - value: steps -save_total_limit: - value: null -scale_embedding: - value: false -seed: - value: 42 -sep_token_id: - value: null -skip_memory_metrics: - value: true -sortish_sampler: - value: false -split_batches: - value: null -suppress_tokens: - value: null -task_specific_params: - value: null -temperature: - value: 1 -tf_legacy_loss: - value: false -tf32: - value: null -tie_encoder_decoder: - value: false -tie_word_embeddings: - value: true -tokenizer_class: - value: null -top_k: - value: 50 -top_p: - value: 1 -torch_compile: - value: false -torch_compile_backend: - value: null -torch_compile_mode: - value: null -torch_dtype: - value: float32 -torch_empty_cache_steps: - value: null -torchdynamo: - value: null -torchscript: - value: false -tpu_metrics_debug: - value: false -tpu_num_cores: - value: null -transformers_version: - value: 4.49.0.dev0 -typical_p: - value: 1 -use_bfloat16: - value: false -use_cache: - value: false -use_cpu: - value: false -use_ipex: - value: false -use_legacy_prediction_loop: - value: false -use_liger_kernel: - value: false -use_mps_device: - value: false -use_weighted_layer_sum: - value: false -vocab_size: - value: 51865 -warmup_ratio: - value: 0 -warmup_steps: - value: 500 -weight_decay: - value: 0 diff --git a/wandb/run-20250212_152506-cp47eoxt/files/output.log b/wandb/run-20250212_152506-cp47eoxt/files/output.log deleted file mode 100644 index d5181e7d9ff6e24c3fb944c5af3fc84ec3922a2c..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/files/output.log +++ /dev/null @@ -1,28 +0,0 @@ - 0%| | 0/8000 [00:00> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. - 0%|▏ | 25/8000 [00:33<2:23:47, 1.08s/it]Traceback (most recent call last): -{'loss': 2.3284, 'grad_norm': 17.581905364990234, 'learning_rate': 4.4e-07, 'epoch': 0.0} - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 719, in - main() - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 668, in main - train_result = trainer.train(resume_from_checkpoint=checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train - return inner_training_loop( - ^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2512, in _inner_training_loop - tr_loss_step = self.training_step(model, inputs, num_items_in_batch) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 3662, in training_step - self.accelerator.backward(loss, **kwargs) - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/accelerator.py", line 2242, in backward - self.scaler.scale(loss).backward(**kwargs) - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/_tensor.py", line 626, in backward - torch.autograd.backward( - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/autograd/__init__.py", line 347, in backward - _engine_run_backward( - File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/autograd/graph.py", line 823, in _engine_run_backward - return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -KeyboardInterrupt diff --git a/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt b/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json b/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json deleted file mode 100644 index 741ca0342dc8378ca92566276e75c09582efae0d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T15:25:06.501811Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--audio_column_name=audio", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "315485667328" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json b/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json deleted file mode 100644 index 80f564210f160830a713856b51172daf9a4d37cf..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_step":0,"train/grad_norm":17.581905364990234,"train/learning_rate":4.4e-07,"train/epoch":0.003125,"train/global_step":25,"_timestamp":1.7393739409734626e+09,"_wandb":{"runtime":35},"_runtime":34.472001053,"train/loss":2.3284} \ No newline at end of file diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log deleted file mode 100644 index 988f517348f024820b907a90b8994b2652345846..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log +++ /dev/null @@ -1,13 +0,0 @@ -{"time":"2025-02-12T15:25:06.32070089Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_o_4dslg/port-242535.txt","pid":242535,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T15:25:06.325654679Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":242535} -{"time":"2025-02-12T15:25:06.325624039Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34421,"Zone":""}} -{"time":"2025-02-12T15:25:06.495145129Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:06.504420484Z","level":"INFO","msg":"handleInformInit: received","streamId":"cp47eoxt","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:06.610655359Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"cp47eoxt","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:41.518158713Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:41.518235362Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:41.518248342Z","level":"INFO","msg":"server is shutting down"} -{"time":"2025-02-12T15:25:41.518365211Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:42.440985993Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:42.441028483Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35824"} -{"time":"2025-02-12T15:25:42.441053643Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log deleted file mode 100644 index ddd19b9298efd912885b30af4c20522b4d39c052..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log +++ /dev/null @@ -1,15 +0,0 @@ -{"time":"2025-02-12T15:25:06.504719321Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log"} -{"time":"2025-02-12T15:25:06.61058157Z","level":"INFO","msg":"created new stream","id":"cp47eoxt"} -{"time":"2025-02-12T15:25:06.61064572Z","level":"INFO","msg":"stream: started","id":"cp47eoxt"} -{"time":"2025-02-12T15:25:06.610715339Z","level":"INFO","msg":"writer: Do: started","stream_id":"cp47eoxt"} -{"time":"2025-02-12T15:25:06.610734969Z","level":"INFO","msg":"handler: started","stream_id":"cp47eoxt"} -{"time":"2025-02-12T15:25:06.610881007Z","level":"INFO","msg":"sender: started","stream_id":"cp47eoxt"} -{"time":"2025-02-12T15:25:06.883150548Z","level":"INFO","msg":"Starting system monitor"} -{"time":"2025-02-12T15:25:41.518262532Z","level":"INFO","msg":"stream: closing","id":"cp47eoxt"} -{"time":"2025-02-12T15:25:41.518304801Z","level":"INFO","msg":"Stopping system monitor"} -{"time":"2025-02-12T15:25:41.519096684Z","level":"INFO","msg":"Stopped system monitor"} -{"time":"2025-02-12T15:25:42.16914698Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-02-12T15:25:42.440671227Z","level":"INFO","msg":"handler: closed","stream_id":"cp47eoxt"} -{"time":"2025-02-12T15:25:42.440734176Z","level":"INFO","msg":"writer: Close: closed","stream_id":"cp47eoxt"} -{"time":"2025-02-12T15:25:42.440750356Z","level":"INFO","msg":"sender: closed","stream_id":"cp47eoxt"} -{"time":"2025-02-12T15:25:42.440859685Z","level":"INFO","msg":"stream: closed","id":"cp47eoxt"} diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug.log deleted file mode 100644 index 5a044527c50b212ff0a9c5959254662b8900d25c..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152506-cp47eoxt/logs/debug.log +++ /dev/null @@ -1,26 +0,0 @@ -2025-02-12 15:25:06,284 INFO MainThread:242535 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 15:25:06,284 INFO MainThread:242535 [wandb_setup.py:_flush():68] Configure stats pid to 242535 -2025-02-12 15:25:06,284 INFO MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 15:25:06,284 INFO MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 15:25:06,284 INFO MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 15:25:06,284 INFO MainThread:242535 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug.log -2025-02-12 15:25:06,285 INFO MainThread:242535 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log -2025-02-12 15:25:06,285 INFO MainThread:242535 [wandb_init.py:init():756] calling init triggers -2025-02-12 15:25:06,285 INFO MainThread:242535 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 15:25:06,285 INFO MainThread:242535 [wandb_init.py:init():789] starting backend -2025-02-12 15:25:06,495 INFO MainThread:242535 [wandb_init.py:init():793] sending inform_init request -2025-02-12 15:25:06,501 INFO MainThread:242535 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 15:25:06,501 INFO MainThread:242535 [wandb_init.py:init():808] backend started and connected -2025-02-12 15:25:06,503 INFO MainThread:242535 [wandb_init.py:init():901] updated telemetry -2025-02-12 15:25:06,511 INFO MainThread:242535 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 15:25:06,880 INFO MainThread:242535 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 15:25:06,988 INFO MainThread:242535 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 15:25:06,988 INFO MainThread:242535 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 15:25:06,988 INFO MainThread:242535 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 15:25:06,988 INFO MainThread:242535 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 15:25:06,990 INFO MainThread:242535 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 15:25:06,991 INFO MainThread:242535 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-24-15_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 15:25:06,993 INFO MainThread:242535 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 15:25:06,993 INFO MainThread:242535 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None -2025-02-12 15:25:41,518 WARNING MsgRouterThr:242535 [router.py:message_loop():75] message_loop has been closed diff --git a/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb b/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb deleted file mode 100644 index cbcfbc2adf86bb17c7862166061d04cf4fb5218e..0000000000000000000000000000000000000000 Binary files a/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb and /dev/null differ diff --git a/wandb/run-20250212_152709-lejyafmi/files/output.log b/wandb/run-20250212_152709-lejyafmi/files/output.log deleted file mode 100644 index 7cd9b7d4875569e33ea86d709a8eee55be82c025..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152709-lejyafmi/files/output.log +++ /dev/null @@ -1,1782 +0,0 @@ - 0%| | 0/8000 [00:00> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. - -{'loss': 2.3284, 'grad_norm': 17.579944610595703, 'learning_rate': 4.4e-07, 'epoch': 0.0} -{'loss': 1.9145, 'grad_norm': 9.753120422363281, 'learning_rate': 9.400000000000001e-07, 'epoch': 0.01} -{'loss': 1.2892, 'grad_norm': 9.469987869262695, 'learning_rate': 1.44e-06, 'epoch': 0.01} -{'loss': 0.9797, 'grad_norm': 6.952774524688721, 'learning_rate': 1.94e-06, 'epoch': 0.01} -{'loss': 0.8265, 'grad_norm': 6.080902576446533, 'learning_rate': 2.4400000000000004e-06, 'epoch': 0.02} -{'loss': 0.6998, 'grad_norm': 5.6766037940979, 'learning_rate': 2.9400000000000002e-06, 'epoch': 0.02} -{'loss': 0.6537, 'grad_norm': 5.372249126434326, 'learning_rate': 3.44e-06, 'epoch': 0.02} -{'loss': 0.6149, 'grad_norm': 5.710323810577393, 'learning_rate': 3.94e-06, 'epoch': 0.03} -{'loss': 0.5256, 'grad_norm': 5.235953330993652, 'learning_rate': 4.440000000000001e-06, 'epoch': 0.03} -{'loss': 0.54, 'grad_norm': 6.58635950088501, 'learning_rate': 4.94e-06, 'epoch': 0.03} -{'loss': 0.5521, 'grad_norm': 5.4912004470825195, 'learning_rate': 5.4400000000000004e-06, 'epoch': 0.03} -{'loss': 0.5379, 'grad_norm': 5.846869945526123, 'learning_rate': 5.94e-06, 'epoch': 0.04} -{'loss': 0.4778, 'grad_norm': 5.060309410095215, 'learning_rate': 6.440000000000001e-06, 'epoch': 0.04} -{'loss': 0.4152, 'grad_norm': 5.06487512588501, 'learning_rate': 6.9400000000000005e-06, 'epoch': 0.04} -{'loss': 0.3547, 'grad_norm': 4.936045169830322, 'learning_rate': 7.440000000000001e-06, 'epoch': 0.05} -{'loss': 0.3428, 'grad_norm': 3.8072471618652344, 'learning_rate': 7.94e-06, 'epoch': 0.05} -{'loss': 0.3099, 'grad_norm': 3.9378795623779297, 'learning_rate': 8.44e-06, 'epoch': 0.05} -{'loss': 0.2963, 'grad_norm': 3.732869863510132, 'learning_rate': 8.94e-06, 'epoch': 0.06} -{'loss': 0.2745, 'grad_norm': 3.9596025943756104, 'learning_rate': 9.440000000000001e-06, 'epoch': 0.06} -{'loss': 0.2626, 'grad_norm': 3.428398370742798, 'learning_rate': 9.940000000000001e-06, 'epoch': 0.06} -{'loss': 0.2411, 'grad_norm': 5.03747034072876, 'learning_rate': 9.970666666666668e-06, 'epoch': 0.07} -{'loss': 0.2389, 'grad_norm': 3.2012217044830322, 'learning_rate': 9.937333333333334e-06, 'epoch': 0.07} -{'loss': 0.2217, 'grad_norm': 3.7361278533935547, 'learning_rate': 9.904e-06, 'epoch': 0.07} -{'loss': 0.2246, 'grad_norm': 4.509885787963867, 'learning_rate': 9.870666666666667e-06, 'epoch': 0.07} -{'loss': 0.199, 'grad_norm': 3.462961435317993, 'learning_rate': 9.837333333333335e-06, 'epoch': 0.08} -{'loss': 0.2156, 'grad_norm': 2.764691114425659, 'learning_rate': 9.804000000000001e-06, 'epoch': 0.08} -{'loss': 0.212, 'grad_norm': 3.059408187866211, 'learning_rate': 9.770666666666668e-06, 'epoch': 0.08} -{'loss': 0.2123, 'grad_norm': 3.952425718307495, 'learning_rate': 9.737333333333334e-06, 'epoch': 0.09} -{'loss': 0.2343, 'grad_norm': 4.892609119415283, 'learning_rate': 9.704e-06, 'epoch': 0.09} -{'loss': 0.3308, 'grad_norm': 4.592615127563477, 'learning_rate': 9.670666666666667e-06, 'epoch': 0.09} -{'loss': 0.3146, 'grad_norm': 4.663967132568359, 'learning_rate': 9.637333333333333e-06, 'epoch': 0.1} -{'loss': 0.3519, 'grad_norm': 5.091048717498779, 'learning_rate': 9.604000000000002e-06, 'epoch': 0.1} -{'loss': 0.2365, 'grad_norm': 3.8216071128845215, 'learning_rate': 9.570666666666666e-06, 'epoch': 0.1} -{'loss': 0.193, 'grad_norm': 3.122516393661499, 'learning_rate': 9.537333333333334e-06, 'epoch': 0.11} -{'loss': 0.1759, 'grad_norm': 2.657339096069336, 'learning_rate': 9.504e-06, 'epoch': 0.11} -{'loss': 0.2387, 'grad_norm': 4.554510116577148, 'learning_rate': 9.470666666666667e-06, 'epoch': 0.11} -{'loss': 0.2845, 'grad_norm': 5.045220851898193, 'learning_rate': 9.437333333333334e-06, 'epoch': 0.12} -{'loss': 0.2755, 'grad_norm': 4.260054588317871, 'learning_rate': 9.404e-06, 'epoch': 0.12} -{'loss': 0.481, 'grad_norm': 5.8209147453308105, 'learning_rate': 9.370666666666668e-06, 'epoch': 0.12} -{'loss': 0.3998, 'grad_norm': 5.498444557189941, 'learning_rate': 9.337333333333335e-06, 'epoch': 0.12} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 15:47:16,534 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 15:47:16,534 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 15:47:24,994 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[WARNING|logging.py:329] 2025-02-12 15:47:25,085 >> Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:25,197 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[WARNING|logging.py:329] 2025-02-12 15:47:25,198 >> The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:26,487 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:27,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:28,903 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:29,960 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:31,125 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:32,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:33,355 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:34,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:35,741 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:36,871 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:37,975 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:39,025 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:39,971 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:41,203 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:42,188 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:43,134 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:44,323 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:45,270 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:46,187 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:47,189 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:48,181 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:49,143 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:50,185 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:51,161 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:52,183 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:53,261 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:54,446 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:55,377 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:56,390 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:57,470 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:58,505 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:47:59,581 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:00,601 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:01,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:02,774 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:03,761 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:04,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:05,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:06,875 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:07,888 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:08,928 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:09,869 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:10,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:11,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:12,891 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:13,888 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:14,855 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:15,977 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:16,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:19,165 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:20,241 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:21,179 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:22,187 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:23,191 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:24,292 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:25,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:26,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:27,247 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:28,207 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:29,289 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:30,296 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:31,338 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:32,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:33,286 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:34,193 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:35,205 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:36,200 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:37,273 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:38,271 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:39,272 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:40,397 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:41,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:42,511 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:43,561 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:44,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:45,566 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:46,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:47,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:48,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:49,564 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:50,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:51,564 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:52,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:53,647 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:54,606 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:55,632 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:56,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:57,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:58,939 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:48:59,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:00,958 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:01,925 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:02,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:04,023 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:05,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:06,086 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:07,100 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:08,098 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:09,138 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:10,195 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:11,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:12,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:13,333 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:14,460 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:15,491 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:16,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:17,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:18,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:19,544 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:20,575 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:21,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:22,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:23,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:24,520 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:25,491 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:26,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:27,608 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:28,604 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:29,597 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:30,632 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:31,579 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:32,624 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:33,643 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:34,623 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:35,589 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:36,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:37,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:38,570 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:39,576 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:40,578 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 15:49:41,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. - 12%|████████████████▎ | 1000/8000 [22:39<2:10:54, 1.12s/it][INFO|trainer.py:3860] 2025-02-12 15:49:49,799 >> Saving model checkpoint to ./checkpoint-1000 -{'eval_loss': 0.36512792110443115, 'eval_wer': 21.50135552023932, 'eval_runtime': 153.2646, 'eval_samples_per_second': 13.728, 'eval_steps_per_second': 0.861, 'epoch': 0.12} -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:2810: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config. - warnings.warn( -[INFO|configuration_utils.py:423] 2025-02-12 15:49:49,801 >> Configuration saved in ./checkpoint-1000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 15:49:49,802 >> Configuration saved in ./checkpoint-1000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 15:49:51,193 >> Model weights saved in ./checkpoint-1000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 15:49:51,195 >> Feature extractor saved in ./checkpoint-1000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 15:49:54,577 >> Feature extractor saved in ./preprocessor_config.json -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs}) - 13%|████████████████▏ | 1001/8000 [22:48<96:09:09, 49.46s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 15:49:59 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. - 13%|████████████████▍ | 1015/8000 [23:04<2:58:40, 1.53s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( - -{'loss': 0.329, 'grad_norm': 4.732964515686035, 'learning_rate': 9.304000000000001e-06, 'epoch': 0.13} -{'loss': 0.2319, 'grad_norm': 3.3556125164031982, 'learning_rate': 9.270666666666667e-06, 'epoch': 0.13} -{'loss': 0.174, 'grad_norm': 2.9708847999572754, 'learning_rate': 9.237333333333334e-06, 'epoch': 0.13} -{'loss': 0.1447, 'grad_norm': 2.841306447982788, 'learning_rate': 9.204e-06, 'epoch': 0.14} -{'loss': 0.1406, 'grad_norm': 2.7909176349639893, 'learning_rate': 9.170666666666668e-06, 'epoch': 0.14} -{'loss': 0.151, 'grad_norm': 3.37842059135437, 'learning_rate': 9.137333333333333e-06, 'epoch': 0.14} -{'loss': 0.1529, 'grad_norm': 3.023977041244507, 'learning_rate': 9.104000000000001e-06, 'epoch': 0.15} -{'loss': 0.1496, 'grad_norm': 3.015974283218384, 'learning_rate': 9.070666666666668e-06, 'epoch': 0.15} -{'loss': 0.219, 'grad_norm': 4.30889892578125, 'learning_rate': 9.037333333333334e-06, 'epoch': 0.15} -{'loss': 0.238, 'grad_norm': 4.160729885101318, 'learning_rate': 9.004e-06, 'epoch': 0.16} -{'loss': 0.2603, 'grad_norm': 4.687659740447998, 'learning_rate': 8.970666666666667e-06, 'epoch': 0.16} -{'loss': 0.2666, 'grad_norm': 4.577232837677002, 'learning_rate': 8.937333333333335e-06, 'epoch': 0.16} -{'loss': 0.2337, 'grad_norm': 5.091732501983643, 'learning_rate': 8.904e-06, 'epoch': 0.17} -{'loss': 0.2379, 'grad_norm': 4.125801086425781, 'learning_rate': 8.870666666666668e-06, 'epoch': 0.17} -{'loss': 0.2215, 'grad_norm': 5.142183303833008, 'learning_rate': 8.837333333333334e-06, 'epoch': 0.17} -{'loss': 0.2136, 'grad_norm': 4.486277103424072, 'learning_rate': 8.804e-06, 'epoch': 0.17} -{'loss': 0.2214, 'grad_norm': 3.5466482639312744, 'learning_rate': 8.770666666666667e-06, 'epoch': 0.18} -{'loss': 0.2113, 'grad_norm': 3.6199097633361816, 'learning_rate': 8.737333333333334e-06, 'epoch': 0.18} -{'loss': 0.1552, 'grad_norm': 2.559951066970825, 'learning_rate': 8.704e-06, 'epoch': 0.18} -{'loss': 0.1354, 'grad_norm': 2.9152133464813232, 'learning_rate': 8.670666666666666e-06, 'epoch': 0.19} -{'loss': 0.144, 'grad_norm': 2.608732223510742, 'learning_rate': 8.637333333333335e-06, 'epoch': 0.19} -{'loss': 0.1367, 'grad_norm': 4.0043416023254395, 'learning_rate': 8.604000000000001e-06, 'epoch': 0.19} -{'loss': 0.1194, 'grad_norm': 2.3621206283569336, 'learning_rate': 8.570666666666667e-06, 'epoch': 0.2} -{'loss': 0.1283, 'grad_norm': 2.6970181465148926, 'learning_rate': 8.537333333333334e-06, 'epoch': 0.2} -{'loss': 0.1858, 'grad_norm': 4.737370014190674, 'learning_rate': 8.504000000000002e-06, 'epoch': 0.2} -{'loss': 0.1995, 'grad_norm': 3.462738513946533, 'learning_rate': 8.470666666666667e-06, 'epoch': 0.21} -{'loss': 0.2028, 'grad_norm': 4.608364582061768, 'learning_rate': 8.437333333333335e-06, 'epoch': 0.21} -{'loss': 0.1952, 'grad_norm': 2.770601987838745, 'learning_rate': 8.404000000000001e-06, 'epoch': 0.21} -{'loss': 0.1464, 'grad_norm': 3.041656017303467, 'learning_rate': 8.370666666666668e-06, 'epoch': 0.22} -{'loss': 0.1424, 'grad_norm': 2.988032102584839, 'learning_rate': 8.337333333333334e-06, 'epoch': 0.22} -{'loss': 0.1233, 'grad_norm': 3.0646026134490967, 'learning_rate': 8.304e-06, 'epoch': 0.22} -{'loss': 0.1384, 'grad_norm': 2.617403268814087, 'learning_rate': 8.270666666666667e-06, 'epoch': 0.23} -{'loss': 0.1208, 'grad_norm': 2.6170425415039062, 'learning_rate': 8.237333333333333e-06, 'epoch': 0.23} -{'loss': 0.1176, 'grad_norm': 2.1296098232269287, 'learning_rate': 8.204000000000001e-06, 'epoch': 0.23} -{'loss': 0.1189, 'grad_norm': 2.767275810241699, 'learning_rate': 8.170666666666668e-06, 'epoch': 0.23} -{'loss': 0.1211, 'grad_norm': 2.7053661346435547, 'learning_rate': 8.137333333333334e-06, 'epoch': 0.24} -{'loss': 0.1156, 'grad_norm': 2.281399965286255, 'learning_rate': 8.104e-06, 'epoch': 0.24} -{'loss': 0.1517, 'grad_norm': 3.7013635635375977, 'learning_rate': 8.070666666666667e-06, 'epoch': 0.24} -{'loss': 0.2002, 'grad_norm': 3.7125532627105713, 'learning_rate': 8.037333333333334e-06, 'epoch': 0.25} -{'loss': 0.1975, 'grad_norm': 3.8716859817504883, 'learning_rate': 8.004e-06, 'epoch': 0.25} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 16:09:35,224 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 16:09:35,311 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 16:09:44,012 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:44,213 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:45,353 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:46,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:48,009 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:49,122 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:50,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:51,439 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:52,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:53,705 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:54,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:55,943 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:56,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:57,978 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:58,906 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:09:59,962 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:00,881 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:01,789 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:02,826 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:03,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:04,701 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:05,692 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:06,679 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:07,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:08,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:09,613 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:10,609 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:11,670 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:12,812 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:13,765 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:14,780 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:15,841 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:16,909 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:17,972 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:18,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:20,066 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:21,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:22,101 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:23,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:24,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:26,400 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:27,375 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:28,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:29,342 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:30,332 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:31,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:32,379 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:33,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:34,359 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:35,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:36,544 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:37,617 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:38,677 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:39,605 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:40,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:41,648 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:42,760 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:43,678 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:44,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:45,707 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:46,693 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:47,755 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:48,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:49,767 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:50,800 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:51,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:52,626 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:53,649 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:54,655 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:55,662 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:56,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:57,658 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:58,766 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:10:59,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:00,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:01,876 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:02,925 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:03,865 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:04,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:05,883 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:06,855 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:07,827 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:08,786 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:09,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:10,854 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:11,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:12,879 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:13,904 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:15,016 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:16,066 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:17,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:18,230 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:19,257 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:20,246 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:21,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:22,318 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:23,359 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:24,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:25,394 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:26,375 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:27,419 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:28,474 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:29,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:30,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:31,620 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:32,753 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:33,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:34,847 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:35,944 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:36,936 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:37,905 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:38,956 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:40,009 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:41,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:41,938 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:42,935 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:43,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:44,962 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:46,048 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:47,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:48,088 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:49,116 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:50,045 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:51,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:52,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:53,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:54,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:55,060 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:56,087 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:57,119 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:58,152 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:11:59,142 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:12:00,092 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. - 25%|████████████████████████████████▌ | 2000/8000 [44:57<1:52:37, 1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:12:08,401 >> Saving model checkpoint to ./checkpoint-2000 -{'eval_loss': 0.2918355464935303, 'eval_wer': 15.873609423202767, 'eval_runtime': 153.1763, 'eval_samples_per_second': 13.736, 'eval_steps_per_second': 0.862, 'epoch': 0.25} -[INFO|configuration_utils.py:423] 2025-02-12 16:12:08,403 >> Configuration saved in ./checkpoint-2000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 16:12:08,403 >> Configuration saved in ./checkpoint-2000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 16:12:09,828 >> Model weights saved in ./checkpoint-2000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 16:12:09,829 >> Feature extractor saved in ./checkpoint-2000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 16:12:13,102 >> Feature extractor saved in ./preprocessor_config.json -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs}) - 25%|████████████████████████████████▎ | 2001/8000 [45:05<81:49:55, 49.11s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 16:12:17 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. - 25%|████████████████████████████████▋ | 2013/8000 [45:24<3:17:54, 1.98s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( - -{'loss': 0.1648, 'grad_norm': 2.4911813735961914, 'learning_rate': 7.970666666666668e-06, 'epoch': 0.25} -{'loss': 0.1162, 'grad_norm': 2.604146718978882, 'learning_rate': 7.937333333333333e-06, 'epoch': 0.26} -{'loss': 0.1135, 'grad_norm': 2.7352280616760254, 'learning_rate': 7.904000000000001e-06, 'epoch': 0.26} -{'loss': 0.1153, 'grad_norm': 2.2932169437408447, 'learning_rate': 7.870666666666667e-06, 'epoch': 0.26} -{'loss': 0.1005, 'grad_norm': 3.1734797954559326, 'learning_rate': 7.837333333333334e-06, 'epoch': 0.27} -{'loss': 0.0988, 'grad_norm': 2.4353103637695312, 'learning_rate': 7.804e-06, 'epoch': 0.27} -{'loss': 0.1028, 'grad_norm': 2.8655478954315186, 'learning_rate': 7.770666666666668e-06, 'epoch': 0.27} -{'loss': 0.1751, 'grad_norm': 3.800967216491699, 'learning_rate': 7.737333333333335e-06, 'epoch': 0.28} -{'loss': 0.1798, 'grad_norm': 4.212419509887695, 'learning_rate': 7.704000000000001e-06, 'epoch': 0.28} -{'loss': 0.199, 'grad_norm': 3.5863020420074463, 'learning_rate': 7.670666666666668e-06, 'epoch': 0.28} -{'loss': 0.1335, 'grad_norm': 3.1013996601104736, 'learning_rate': 7.637333333333334e-06, 'epoch': 0.28} -{'loss': 0.0976, 'grad_norm': 2.2462713718414307, 'learning_rate': 7.604e-06, 'epoch': 0.29} -{'loss': 0.0946, 'grad_norm': 2.9669203758239746, 'learning_rate': 7.570666666666668e-06, 'epoch': 0.29} -{'loss': 0.0935, 'grad_norm': 2.645289897918701, 'learning_rate': 7.537333333333334e-06, 'epoch': 0.29} -{'loss': 0.1045, 'grad_norm': 1.9715274572372437, 'learning_rate': 7.5040000000000005e-06, 'epoch': 0.3} -{'loss': 0.0977, 'grad_norm': 2.1423373222351074, 'learning_rate': 7.470666666666667e-06, 'epoch': 0.3} -{'loss': 0.1061, 'grad_norm': 2.029958963394165, 'learning_rate': 7.437333333333334e-06, 'epoch': 0.3} -{'loss': 0.0998, 'grad_norm': 1.972732663154602, 'learning_rate': 7.404e-06, 'epoch': 0.31} -{'loss': 0.1068, 'grad_norm': 2.2875239849090576, 'learning_rate': 7.370666666666667e-06, 'epoch': 0.31} -{'loss': 0.1168, 'grad_norm': 3.1778981685638428, 'learning_rate': 7.337333333333334e-06, 'epoch': 0.31} -{'loss': 0.1524, 'grad_norm': 3.360576868057251, 'learning_rate': 7.304000000000001e-06, 'epoch': 0.32} -{'loss': 0.1483, 'grad_norm': 3.5467047691345215, 'learning_rate': 7.270666666666667e-06, 'epoch': 0.32} -{'loss': 0.1775, 'grad_norm': 3.488696575164795, 'learning_rate': 7.237333333333334e-06, 'epoch': 0.32} -{'loss': 0.135, 'grad_norm': 2.8800296783447266, 'learning_rate': 7.204000000000001e-06, 'epoch': 0.33} -{'loss': 0.1108, 'grad_norm': 3.1020660400390625, 'learning_rate': 7.170666666666667e-06, 'epoch': 0.33} -{'loss': 0.1002, 'grad_norm': 2.1233720779418945, 'learning_rate': 7.137333333333334e-06, 'epoch': 0.33} -{'loss': 0.0941, 'grad_norm': 2.393425703048706, 'learning_rate': 7.104000000000001e-06, 'epoch': 0.33} -{'loss': 0.0959, 'grad_norm': 2.295924186706543, 'learning_rate': 7.0706666666666665e-06, 'epoch': 0.34} -{'loss': 0.1116, 'grad_norm': 1.8125039339065552, 'learning_rate': 7.037333333333334e-06, 'epoch': 0.34} -{'loss': 0.1146, 'grad_norm': 3.006834030151367, 'learning_rate': 7.004000000000001e-06, 'epoch': 0.34} -{'loss': 0.2029, 'grad_norm': 4.171006679534912, 'learning_rate': 6.970666666666667e-06, 'epoch': 0.35} -{'loss': 0.1913, 'grad_norm': 3.68646240234375, 'learning_rate': 6.937333333333334e-06, 'epoch': 0.35} -{'loss': 0.16, 'grad_norm': 3.7463300228118896, 'learning_rate': 6.904e-06, 'epoch': 0.35} -{'loss': 0.1571, 'grad_norm': 3.069136381149292, 'learning_rate': 6.8706666666666676e-06, 'epoch': 0.36} -{'loss': 0.1608, 'grad_norm': 3.17172908782959, 'learning_rate': 6.837333333333334e-06, 'epoch': 0.36} -{'loss': 0.1546, 'grad_norm': 3.1673102378845215, 'learning_rate': 6.804e-06, 'epoch': 0.36} -{'loss': 0.1282, 'grad_norm': 2.344193935394287, 'learning_rate': 6.770666666666668e-06, 'epoch': 0.37} -{'loss': 0.0979, 'grad_norm': 2.5321226119995117, 'learning_rate': 6.737333333333333e-06, 'epoch': 0.37} -{'loss': 0.1049, 'grad_norm': 2.2652363777160645, 'learning_rate': 6.7040000000000005e-06, 'epoch': 0.37} -{'loss': 0.1433, 'grad_norm': 2.7856993675231934, 'learning_rate': 6.670666666666668e-06, 'epoch': 0.38} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 16:32:16,805 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 16:32:16,805 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 16:32:24,994 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:25,165 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:26,282 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:27,776 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:29,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:30,273 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:31,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:32,739 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:33,771 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:34,884 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:35,931 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:36,990 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:38,037 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:39,057 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:39,996 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:41,081 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:42,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:42,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:43,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:44,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:45,808 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:46,811 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:47,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:48,768 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:49,750 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:50,750 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:51,749 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:52,812 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:53,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:54,901 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:55,908 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:56,978 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:58,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:32:59,089 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:00,086 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:01,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:02,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:03,217 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:04,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:05,324 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:07,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:08,515 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:09,566 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:10,508 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:11,497 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:12,472 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:13,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:14,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:15,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:16,645 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:17,663 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:18,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:19,731 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:20,661 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:21,675 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:22,678 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:23,752 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:24,673 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:25,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:26,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:27,622 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:28,693 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:29,685 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:30,756 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:31,779 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:32,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:33,577 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:34,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:35,631 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:36,667 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:37,676 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:38,674 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:39,764 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:40,785 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:41,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:42,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:43,891 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:44,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:45,810 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:46,829 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:47,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:48,759 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:49,713 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:50,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:51,773 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:52,802 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:53,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:54,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:55,881 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:56,931 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:58,078 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:33:59,112 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:00,159 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:01,125 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:02,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:03,167 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:04,200 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:05,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:06,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:07,179 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:08,207 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:09,226 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:10,272 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:11,252 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:12,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:13,418 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:14,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:15,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:16,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:17,519 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:18,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:19,486 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:20,519 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:21,470 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:22,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:23,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:24,377 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:25,447 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:26,486 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:27,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:28,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:29,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:30,428 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:31,462 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:32,460 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:33,428 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:34,392 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:35,367 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:36,383 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:37,360 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:38,347 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:39,325 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:34:40,266 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. - 38%|████████████████████████████████████████████████ | 3000/8000 [1:07:37<1:34:23, 1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:34:48,564 >> Saving model checkpoint to ./checkpoint-3000 -{'eval_loss': 0.2720916271209717, 'eval_wer': 13.9010937646069, 'eval_runtime': 151.7576, 'eval_samples_per_second': 13.864, 'eval_steps_per_second': 0.87, 'epoch': 0.38} -[INFO|configuration_utils.py:423] 2025-02-12 16:34:48,565 >> Configuration saved in ./checkpoint-3000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 16:34:48,566 >> Configuration saved in ./checkpoint-3000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 16:34:49,987 >> Model weights saved in ./checkpoint-3000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 16:34:49,988 >> Feature extractor saved in ./checkpoint-3000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 16:34:53,620 >> Feature extractor saved in ./preprocessor_config.json -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs}) - 38%|███████████████████████████████████████████████▋ | 3001/8000 [1:07:45<67:42:47, 48.76s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 16:34:57 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. - 38%|████████████████████████████████████████████████▏ | 3009/8000 [1:07:56<5:31:43, 3.99s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( - -{'loss': 0.1758, 'grad_norm': 4.214677810668945, 'learning_rate': 6.637333333333333e-06, 'epoch': 0.38} -{'loss': 0.1972, 'grad_norm': 4.144543647766113, 'learning_rate': 6.604000000000001e-06, 'epoch': 0.38} -{'loss': 0.1293, 'grad_norm': 2.1775295734405518, 'learning_rate': 6.570666666666667e-06, 'epoch': 0.38} -{'loss': 0.099, 'grad_norm': 2.796152353286743, 'learning_rate': 6.537333333333334e-06, 'epoch': 0.39} -{'loss': 0.0945, 'grad_norm': 2.1920204162597656, 'learning_rate': 6.504e-06, 'epoch': 0.39} -{'loss': 0.1118, 'grad_norm': 2.8689582347869873, 'learning_rate': 6.470666666666667e-06, 'epoch': 0.39} -{'loss': 0.1732, 'grad_norm': 3.580993175506592, 'learning_rate': 6.4373333333333344e-06, 'epoch': 0.4} -{'loss': 0.1581, 'grad_norm': 3.9165573120117188, 'learning_rate': 6.404e-06, 'epoch': 0.4} -{'loss': 0.1716, 'grad_norm': 3.8235292434692383, 'learning_rate': 6.370666666666667e-06, 'epoch': 0.4} -{'loss': 0.1364, 'grad_norm': 3.21138072013855, 'learning_rate': 6.3373333333333345e-06, 'epoch': 0.41} -{'loss': 0.1459, 'grad_norm': 3.925539255142212, 'learning_rate': 6.304e-06, 'epoch': 0.41} -{'loss': 0.1668, 'grad_norm': 3.062764883041382, 'learning_rate': 6.270666666666667e-06, 'epoch': 0.41} -{'loss': 0.1243, 'grad_norm': 2.8379392623901367, 'learning_rate': 6.237333333333334e-06, 'epoch': 0.42} -{'loss': 0.0979, 'grad_norm': 2.979661226272583, 'learning_rate': 6.204e-06, 'epoch': 0.42} -{'loss': 0.0848, 'grad_norm': 2.4838883876800537, 'learning_rate': 6.170666666666667e-06, 'epoch': 0.42} -{'loss': 0.0927, 'grad_norm': 2.3293073177337646, 'learning_rate': 6.137333333333334e-06, 'epoch': 0.42} -{'loss': 0.0976, 'grad_norm': 3.3497400283813477, 'learning_rate': 6.104000000000001e-06, 'epoch': 0.43} -{'loss': 0.0881, 'grad_norm': 2.0302255153656006, 'learning_rate': 6.070666666666667e-06, 'epoch': 0.43} -{'loss': 0.0828, 'grad_norm': 2.112396001815796, 'learning_rate': 6.037333333333334e-06, 'epoch': 0.43} -{'loss': 0.0983, 'grad_norm': 2.513197183609009, 'learning_rate': 6.004000000000001e-06, 'epoch': 0.44} -{'loss': 0.0929, 'grad_norm': 2.1429622173309326, 'learning_rate': 5.970666666666667e-06, 'epoch': 0.44} -{'loss': 0.0916, 'grad_norm': 2.7300236225128174, 'learning_rate': 5.937333333333334e-06, 'epoch': 0.44} -{'loss': 0.1426, 'grad_norm': 4.011541366577148, 'learning_rate': 5.9040000000000006e-06, 'epoch': 0.45} -{'loss': 0.163, 'grad_norm': 3.1994545459747314, 'learning_rate': 5.870666666666667e-06, 'epoch': 0.45} -{'loss': 0.1568, 'grad_norm': 2.98388934135437, 'learning_rate': 5.837333333333333e-06, 'epoch': 0.45} -{'loss': 0.0937, 'grad_norm': 2.4515798091888428, 'learning_rate': 5.804000000000001e-06, 'epoch': 0.46} -{'loss': 0.0861, 'grad_norm': 2.0767834186553955, 'learning_rate': 5.770666666666666e-06, 'epoch': 0.46} -{'loss': 0.0917, 'grad_norm': 2.601104974746704, 'learning_rate': 5.7373333333333335e-06, 'epoch': 0.46} -{'loss': 0.1022, 'grad_norm': 2.593489408493042, 'learning_rate': 5.704000000000001e-06, 'epoch': 0.47} -{'loss': 0.1304, 'grad_norm': 3.5832834243774414, 'learning_rate': 5.670666666666668e-06, 'epoch': 0.47} -{'loss': 0.1634, 'grad_norm': 3.4403560161590576, 'learning_rate': 5.637333333333334e-06, 'epoch': 0.47} -{'loss': 0.1683, 'grad_norm': 3.6842737197875977, 'learning_rate': 5.604000000000001e-06, 'epoch': 0.47} -{'loss': 0.1538, 'grad_norm': 3.8382315635681152, 'learning_rate': 5.570666666666667e-06, 'epoch': 0.48} -{'loss': 0.165, 'grad_norm': 4.207257270812988, 'learning_rate': 5.537333333333334e-06, 'epoch': 0.48} -{'loss': 0.1558, 'grad_norm': 2.4130444526672363, 'learning_rate': 5.504e-06, 'epoch': 0.48} -{'loss': 0.1096, 'grad_norm': 2.3981151580810547, 'learning_rate': 5.4706666666666674e-06, 'epoch': 0.49} -{'loss': 0.0937, 'grad_norm': 2.2837915420532227, 'learning_rate': 5.437333333333333e-06, 'epoch': 0.49} -{'loss': 0.0876, 'grad_norm': 2.6647775173187256, 'learning_rate': 5.404e-06, 'epoch': 0.49} -{'loss': 0.15, 'grad_norm': 3.7677643299102783, 'learning_rate': 5.3706666666666675e-06, 'epoch': 0.5} -{'loss': 0.1925, 'grad_norm': 3.542175769805908, 'learning_rate': 5.337333333333333e-06, 'epoch': 0.5} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 16:54:50,654 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 16:54:50,654 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 16:54:58,812 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 16:54:58,999 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:00,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:01,715 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:03,007 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:04,217 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:05,579 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:06,663 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:07,716 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:08,870 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:09,965 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:11,072 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:12,108 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:13,097 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:14,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:15,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:15,989 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:16,893 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:17,929 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:18,887 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:19,797 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:20,802 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:21,787 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:22,749 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:23,731 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:24,709 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:25,688 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:26,745 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:27,889 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:28,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:29,817 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:30,877 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:31,914 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:32,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:34,005 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:35,132 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:36,186 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:37,147 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:38,209 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:39,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:40,203 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:41,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:42,259 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:43,227 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:44,221 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:45,189 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:46,268 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:47,252 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:48,255 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:49,402 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:50,418 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:51,459 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:52,515 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:53,447 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:54,476 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:55,511 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:56,620 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:57,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:58,475 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:55:59,523 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:00,465 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:01,524 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:02,521 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:03,583 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:04,595 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:05,503 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:06,411 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:07,417 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:08,417 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:09,431 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:10,421 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:11,400 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:12,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:13,455 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:14,528 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:15,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:16,543 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:17,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:18,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:19,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:20,442 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:21,387 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:22,327 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:23,330 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:24,372 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:25,405 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:26,355 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:27,363 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:28,457 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:29,472 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:30,595 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:31,624 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:32,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:33,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:34,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:35,646 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:36,674 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:37,687 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:38,697 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:39,660 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:40,685 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:41,723 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:42,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:43,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:44,867 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:45,989 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:47,016 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:48,077 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:49,183 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:50,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:51,120 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:52,155 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:53,192 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:54,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:55,101 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:56,092 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:57,046 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:58,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:56:59,145 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:00,139 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:01,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:02,173 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:03,111 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:04,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:05,210 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:06,183 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:07,142 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:08,122 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:09,142 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:10,120 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:11,120 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:12,097 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 16:57:13,020 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. - 50%|████████████████████████████████████████████████████████████████ | 4000/8000 [1:30:10<1:15:16, 1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:57:21,319 >> Saving model checkpoint to ./checkpoint-4000 -{'eval_loss': 0.25648659467697144, 'eval_wer': 12.7372160418809, 'eval_runtime': 150.6646, 'eval_samples_per_second': 13.965, 'eval_steps_per_second': 0.876, 'epoch': 0.5} -[INFO|configuration_utils.py:423] 2025-02-12 16:57:21,320 >> Configuration saved in ./checkpoint-4000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 16:57:21,321 >> Configuration saved in ./checkpoint-4000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 16:57:22,731 >> Model weights saved in ./checkpoint-4000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 16:57:22,732 >> Feature extractor saved in ./checkpoint-4000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 16:57:26,282 >> Feature extractor saved in ./preprocessor_config.json -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs}) - 50%|███████████████████████████████████████████████████████████████▌ | 4001/8000 [1:30:19<54:04:14, 48.68s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 16:57:30 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. - 50%|████████████████████████████████████████████████████████████████▏ | 4008/8000 [1:30:28<5:43:05, 5.16s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( - 57%|█████████████████████████████████████████████████████████████████████████▌ | 4600/8000 [1:43:49<1:03:53, 1.13s/it]'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 415cf487-aa47-4982-8de5-bb40b3cb3a69)')' thrown while requesting GET https://huggingface.co/datasets/asierhv/composite_corpus_eu_v2.1/resolve/2c2153d993ce951528b5b62eb207632c6d267c29/data/train-00018-of-00150.parquet -{'loss': 0.1434, 'grad_norm': 2.5672571659088135, 'learning_rate': 5.304e-06, 'epoch': 0.5} -{'loss': 0.2075, 'grad_norm': 4.591808319091797, 'learning_rate': 5.270666666666668e-06, 'epoch': 0.51} -{'loss': 0.1478, 'grad_norm': 3.485185146331787, 'learning_rate': 5.237333333333334e-06, 'epoch': 0.51} -{'loss': 0.1383, 'grad_norm': 2.5995991230010986, 'learning_rate': 5.2040000000000005e-06, 'epoch': 0.51} -{'loss': 0.0959, 'grad_norm': 2.4682819843292236, 'learning_rate': 5.170666666666667e-06, 'epoch': 0.52} -{'loss': 0.0857, 'grad_norm': 2.436518669128418, 'learning_rate': 5.137333333333334e-06, 'epoch': 0.52} -{'loss': 0.0862, 'grad_norm': 2.0344107151031494, 'learning_rate': 5.104e-06, 'epoch': 0.52} -{'loss': 0.0808, 'grad_norm': 1.6771937608718872, 'learning_rate': 5.070666666666667e-06, 'epoch': 0.53} -{'loss': 0.0872, 'grad_norm': 1.7831439971923828, 'learning_rate': 5.037333333333334e-06, 'epoch': 0.53} -{'loss': 0.0832, 'grad_norm': 2.228795051574707, 'learning_rate': 5.004e-06, 'epoch': 0.53} -{'loss': 0.0927, 'grad_norm': 3.1402647495269775, 'learning_rate': 4.970666666666667e-06, 'epoch': 0.53} -{'loss': 0.1477, 'grad_norm': 3.662506580352783, 'learning_rate': 4.937333333333334e-06, 'epoch': 0.54} -{'loss': 0.1262, 'grad_norm': 2.865934371948242, 'learning_rate': 4.904000000000001e-06, 'epoch': 0.54} -{'loss': 0.1329, 'grad_norm': 3.2233200073242188, 'learning_rate': 4.870666666666667e-06, 'epoch': 0.54} -{'loss': 0.0795, 'grad_norm': 2.093703269958496, 'learning_rate': 4.837333333333334e-06, 'epoch': 0.55} -{'loss': 0.0715, 'grad_norm': 1.7601807117462158, 'learning_rate': 4.804e-06, 'epoch': 0.55} -{'loss': 0.0797, 'grad_norm': 2.1606643199920654, 'learning_rate': 4.770666666666667e-06, 'epoch': 0.55} -{'loss': 0.0883, 'grad_norm': 2.565343141555786, 'learning_rate': 4.737333333333334e-06, 'epoch': 0.56} -{'loss': 0.0965, 'grad_norm': 2.062619924545288, 'learning_rate': 4.704e-06, 'epoch': 0.56} -{'loss': 0.0891, 'grad_norm': 2.2219879627227783, 'learning_rate': 4.6706666666666675e-06, 'epoch': 0.56} -{'loss': 0.1147, 'grad_norm': 2.857029676437378, 'learning_rate': 4.637333333333334e-06, 'epoch': 0.57} -{'loss': 0.144, 'grad_norm': 3.090247392654419, 'learning_rate': 4.604e-06, 'epoch': 0.57} -{'loss': 0.1451, 'grad_norm': 3.8906264305114746, 'learning_rate': 4.570666666666667e-06, 'epoch': 0.57} -{'loss': 0.1475, 'grad_norm': 3.7733590602874756, 'learning_rate': 4.537333333333334e-06, 'epoch': 0.57} -02/12/2025 17:11:00 - WARNING - huggingface_hub.utils._http - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 415cf487-aa47-4982-8de5-bb40b3cb3a69)')' thrown while requesting GET https://huggingface.co/datasets/asierhv/composite_corpus_eu_v2.1/resolve/2c2153d993ce951528b5b62eb207632c6d267c29/data/train-00018-of-00150.parquet -Retrying in 1s [Retry 1/5]. -02/12/2025 17:11:00 - WARNING - huggingface_hub.utils._http - Retrying in 1s [Retry 1/5]. - 62%|█████████████████████████████████████████████████████████████████████████████████▎ | 5000/8000 [1:52:57<58:08, 1.16s/it][INFO|trainer.py:4176] 2025-02-12 17:20:08,509 >> -{'loss': 0.1509, 'grad_norm': 3.379163980484009, 'learning_rate': 4.504e-06, 'epoch': 0.58} -{'loss': 0.1444, 'grad_norm': 3.4210824966430664, 'learning_rate': 4.470666666666667e-06, 'epoch': 0.58} -{'loss': 0.1295, 'grad_norm': 3.7809910774230957, 'learning_rate': 4.437333333333333e-06, 'epoch': 0.58} -{'loss': 0.1158, 'grad_norm': 2.537574052810669, 'learning_rate': 4.4040000000000005e-06, 'epoch': 0.59} -{'loss': 0.1249, 'grad_norm': 3.482285261154175, 'learning_rate': 4.370666666666667e-06, 'epoch': 0.59} -{'loss': 0.1238, 'grad_norm': 3.0114011764526367, 'learning_rate': 4.337333333333334e-06, 'epoch': 0.59} -{'loss': 0.0888, 'grad_norm': 2.117215394973755, 'learning_rate': 4.304000000000001e-06, 'epoch': 0.6} -{'loss': 0.0972, 'grad_norm': 2.0158379077911377, 'learning_rate': 4.270666666666667e-06, 'epoch': 0.6} -{'loss': 0.0793, 'grad_norm': 2.5208640098571777, 'learning_rate': 4.2373333333333335e-06, 'epoch': 0.6} -{'loss': 0.1035, 'grad_norm': 2.820002555847168, 'learning_rate': 4.204e-06, 'epoch': 0.61} -{'loss': 0.1128, 'grad_norm': 3.1144282817840576, 'learning_rate': 4.170666666666667e-06, 'epoch': 0.61} -{'loss': 0.1217, 'grad_norm': 3.1345527172088623, 'learning_rate': 4.137333333333334e-06, 'epoch': 0.61} -{'loss': 0.1061, 'grad_norm': 2.2702696323394775, 'learning_rate': 4.104e-06, 'epoch': 0.62} -{'loss': 0.0919, 'grad_norm': 2.714102268218994, 'learning_rate': 4.072e-06, 'epoch': 0.62} -{'loss': 0.0855, 'grad_norm': 2.448854923248291, 'learning_rate': 4.0386666666666666e-06, 'epoch': 0.62} -{'loss': 0.0818, 'grad_norm': 2.9392127990722656, 'learning_rate': 4.005333333333334e-06, 'epoch': 0.62} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 17:20:08,509 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 17:20:08,509 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 17:20:24,466 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:24,644 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:25,696 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:27,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:28,226 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:29,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:30,337 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:31,421 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:32,606 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:33,725 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:34,928 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:36,023 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:37,063 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:38,060 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:38,972 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:40,096 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:41,013 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:41,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:42,954 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:43,909 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:44,822 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:45,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:46,805 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:47,791 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:48,772 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:49,751 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:50,777 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:51,848 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:53,018 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:53,949 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:54,953 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:56,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:57,050 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:58,105 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:20:59,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:00,186 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:01,253 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:02,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:03,298 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:04,334 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:06,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:07,528 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:08,547 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:09,474 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:10,457 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:11,438 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:12,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:13,546 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:14,512 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:15,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:16,624 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:17,677 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:18,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:19,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:20,847 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:21,908 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:23,107 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:24,023 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:24,971 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:26,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:26,981 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:28,042 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:29,030 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:30,075 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:31,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:32,046 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:32,943 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:33,966 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:34,964 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:35,964 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:36,954 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:37,941 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:39,031 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:40,029 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:41,103 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:42,101 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:43,121 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:44,058 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:45,056 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:46,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:47,057 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:48,025 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:48,993 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:50,011 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:51,071 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:52,108 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:53,099 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:54,122 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:55,245 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:56,295 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:57,450 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:58,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:21:59,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:00,457 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:01,456 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:02,577 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:03,618 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:04,668 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:05,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:06,874 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:07,937 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:08,961 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:10,020 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:10,991 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:12,065 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:13,172 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:14,180 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:15,210 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:16,291 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:17,283 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:18,236 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:19,270 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:20,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:21,261 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:22,220 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:23,229 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:24,175 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:25,245 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:26,288 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:27,280 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:28,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:29,282 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:30,210 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:31,251 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:32,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:33,232 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:34,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:35,172 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:36,205 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:37,205 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:38,228 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:39,232 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:22:40,181 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. - 62%|█████████████████████████████████████████████████████████████████████████████████▎ | 5000/8000 [1:55:37<58:08, 1.16s/it][INFO|trainer.py:3860] 2025-02-12 17:22:48,522 >> Saving model checkpoint to ./checkpoint-5000 -{'eval_loss': 0.2562941014766693, 'eval_wer': 11.942600729176405, 'eval_runtime': 160.0125, 'eval_samples_per_second': 13.149, 'eval_steps_per_second': 0.825, 'epoch': 0.62} -[INFO|configuration_utils.py:423] 2025-02-12 17:22:48,523 >> Configuration saved in ./checkpoint-5000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 17:22:48,524 >> Configuration saved in ./checkpoint-5000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 17:22:49,924 >> Model weights saved in ./checkpoint-5000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 17:22:49,926 >> Feature extractor saved in ./checkpoint-5000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 17:22:53,468 >> Feature extractor saved in ./preprocessor_config.json -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs}) - 63%|███████████████████████████████████████████████████████████████████████████████▍ | 5001/8000 [1:55:46<42:48:38, 51.39s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 17:22:57 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. - 63%|████████████████████████████████████████████████████████████████████████████████ | 5006/8000 [1:55:53<8:05:45, 9.73s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( - -{'loss': 0.1203, 'grad_norm': 2.4964210987091064, 'learning_rate': 3.972e-06, 'epoch': 0.63} -{'loss': 0.111, 'grad_norm': 3.330078125, 'learning_rate': 3.938666666666667e-06, 'epoch': 0.63} -{'loss': 0.164, 'grad_norm': 3.6872191429138184, 'learning_rate': 3.905333333333334e-06, 'epoch': 0.63} -{'loss': 0.1515, 'grad_norm': 3.728769063949585, 'learning_rate': 3.872e-06, 'epoch': 0.64} -{'loss': 0.1334, 'grad_norm': 3.4183156490325928, 'learning_rate': 3.838666666666667e-06, 'epoch': 0.64} -{'loss': 0.134, 'grad_norm': 3.4580440521240234, 'learning_rate': 3.8053333333333336e-06, 'epoch': 0.64} -{'loss': 0.1088, 'grad_norm': 2.2719855308532715, 'learning_rate': 3.772e-06, 'epoch': 0.65} -{'loss': 0.0724, 'grad_norm': 2.3186910152435303, 'learning_rate': 3.7386666666666673e-06, 'epoch': 0.65} -{'loss': 0.0759, 'grad_norm': 1.8175565004348755, 'learning_rate': 3.7053333333333337e-06, 'epoch': 0.65} -{'loss': 0.0813, 'grad_norm': 2.0874826908111572, 'learning_rate': 3.6720000000000006e-06, 'epoch': 0.66} -{'loss': 0.0824, 'grad_norm': 1.9950120449066162, 'learning_rate': 3.638666666666667e-06, 'epoch': 0.66} -{'loss': 0.0835, 'grad_norm': 2.6349194049835205, 'learning_rate': 3.6053333333333334e-06, 'epoch': 0.66} -{'loss': 0.0823, 'grad_norm': 2.7667415142059326, 'learning_rate': 3.5720000000000003e-06, 'epoch': 0.67} -{'loss': 0.1077, 'grad_norm': 3.617748260498047, 'learning_rate': 3.538666666666667e-06, 'epoch': 0.67} -{'loss': 0.1268, 'grad_norm': 3.2603073120117188, 'learning_rate': 3.5053333333333335e-06, 'epoch': 0.67} -{'loss': 0.1206, 'grad_norm': 2.9681355953216553, 'learning_rate': 3.4720000000000004e-06, 'epoch': 0.68} -{'loss': 0.1279, 'grad_norm': 4.156548500061035, 'learning_rate': 3.438666666666667e-06, 'epoch': 0.68} -{'loss': 0.1177, 'grad_norm': 3.2013888359069824, 'learning_rate': 3.4053333333333337e-06, 'epoch': 0.68} -{'loss': 0.0946, 'grad_norm': 3.299403190612793, 'learning_rate': 3.372e-06, 'epoch': 0.68} -{'loss': 0.0944, 'grad_norm': 2.39630389213562, 'learning_rate': 3.338666666666667e-06, 'epoch': 0.69} -{'loss': 0.1149, 'grad_norm': 3.7624928951263428, 'learning_rate': 3.3053333333333338e-06, 'epoch': 0.69} -{'loss': 0.1373, 'grad_norm': 3.3170886039733887, 'learning_rate': 3.272e-06, 'epoch': 0.69} -{'loss': 0.1056, 'grad_norm': 2.2296531200408936, 'learning_rate': 3.238666666666667e-06, 'epoch': 0.7} -{'loss': 0.0724, 'grad_norm': 1.8995999097824097, 'learning_rate': 3.2053333333333334e-06, 'epoch': 0.7} -{'loss': 0.0604, 'grad_norm': 2.3782520294189453, 'learning_rate': 3.172e-06, 'epoch': 0.7} -{'loss': 0.0581, 'grad_norm': 2.2558810710906982, 'learning_rate': 3.138666666666667e-06, 'epoch': 0.71} -{'loss': 0.0713, 'grad_norm': 2.4040448665618896, 'learning_rate': 3.1053333333333336e-06, 'epoch': 0.71} -{'loss': 0.0773, 'grad_norm': 2.5696732997894287, 'learning_rate': 3.072e-06, 'epoch': 0.71} -{'loss': 0.0765, 'grad_norm': 2.237166404724121, 'learning_rate': 3.038666666666667e-06, 'epoch': 0.72} -{'loss': 0.0779, 'grad_norm': 1.8783671855926514, 'learning_rate': 3.0053333333333332e-06, 'epoch': 0.72} -{'loss': 0.0751, 'grad_norm': 2.096334457397461, 'learning_rate': 2.9720000000000005e-06, 'epoch': 0.72} -{'loss': 0.0711, 'grad_norm': 2.0362164974212646, 'learning_rate': 2.938666666666667e-06, 'epoch': 0.72} -{'loss': 0.0635, 'grad_norm': 1.7136311531066895, 'learning_rate': 2.9053333333333334e-06, 'epoch': 0.73} -{'loss': 0.0698, 'grad_norm': 2.754848003387451, 'learning_rate': 2.872e-06, 'epoch': 0.73} -{'loss': 0.0741, 'grad_norm': 2.058065176010132, 'learning_rate': 2.8386666666666666e-06, 'epoch': 0.73} -{'loss': 0.0938, 'grad_norm': 3.0389583110809326, 'learning_rate': 2.805333333333334e-06, 'epoch': 0.74} -{'loss': 0.1387, 'grad_norm': 3.4811720848083496, 'learning_rate': 2.7720000000000003e-06, 'epoch': 0.74} -{'loss': 0.1283, 'grad_norm': 3.2388477325439453, 'learning_rate': 2.7386666666666667e-06, 'epoch': 0.74} -{'loss': 0.1073, 'grad_norm': 3.083925247192383, 'learning_rate': 2.7053333333333336e-06, 'epoch': 0.75} -{'loss': 0.1038, 'grad_norm': 2.6847918033599854, 'learning_rate': 2.672e-06, 'epoch': 0.75} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 17:43:07,939 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 17:43:07,939 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 17:43:22,192 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:22,365 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:23,580 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:24,998 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:26,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:27,276 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:28,426 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:29,638 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:30,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:31,992 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:33,202 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:34,361 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:35,413 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:36,412 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:37,319 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:38,399 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:39,321 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:40,237 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:41,294 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:42,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:43,182 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:44,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:45,180 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:46,166 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:47,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:48,155 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:49,156 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:50,230 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:51,385 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:52,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:53,343 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:54,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:55,475 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:56,547 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:57,541 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:58,648 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:43:59,691 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:00,661 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:01,761 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:02,783 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:03,752 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:04,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:05,767 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:06,710 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:07,708 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:08,679 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:09,757 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:10,772 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:11,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:12,871 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:13,884 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:14,922 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:15,966 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:16,906 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:17,924 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:18,944 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:20,063 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:21,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:21,957 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:23,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:23,958 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:25,029 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:26,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:27,079 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:28,098 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:29,042 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:29,943 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:30,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:32,029 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:33,081 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:34,111 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:35,117 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:36,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:37,255 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:38,335 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:39,362 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:40,399 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:41,342 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:42,334 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:43,368 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:44,346 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:45,347 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:46,329 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:47,362 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:48,419 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:49,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:50,453 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:51,480 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:52,610 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:53,644 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:54,788 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:55,817 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:56,829 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:57,839 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:58,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:44:59,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:01,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:02,055 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:03,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:04,056 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:05,098 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:06,126 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:07,194 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:08,191 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:09,283 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:10,424 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:11,448 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:12,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:13,598 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:14,587 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:15,548 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:16,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:17,638 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:18,603 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:19,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:20,634 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:21,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:22,663 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:23,696 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:24,697 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:25,694 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:26,730 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:27,669 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:28,722 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:29,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:30,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:31,713 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:32,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:33,710 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:34,712 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:35,718 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:36,698 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 17:45:37,644 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. - 75%|█████████████████████████████████████████████████████████████████████████████████████████████████▌ | 6000/8000 [2:18:35<38:52, 1.17s/it][INFO|trainer.py:3860] 2025-02-12 17:45:46,009 >> Saving model checkpoint to ./checkpoint-6000 -{'eval_loss': 0.23902159929275513, 'eval_wer': 11.07319809292325, 'eval_runtime': 158.0693, 'eval_samples_per_second': 13.311, 'eval_steps_per_second': 0.835, 'epoch': 0.75} -[INFO|configuration_utils.py:423] 2025-02-12 17:45:46,010 >> Configuration saved in ./checkpoint-6000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 17:45:46,011 >> Configuration saved in ./checkpoint-6000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 17:45:47,414 >> Model weights saved in ./checkpoint-6000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 17:45:47,415 >> Feature extractor saved in ./checkpoint-6000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 17:45:50,918 >> Feature extractor saved in ./preprocessor_config.json -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs}) - 75%|███████████████████████████████████████████████████████████████████████████████████████████████▎ | 6002/8000 [2:18:44<19:50:45, 35.76s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 17:45:56 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. - 75%|█████████████████████████████████████████████████████████████████████████████████████████████████▊ | 6017/8000 [2:19:02<44:13, 1.34s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( - -{'loss': 0.0987, 'grad_norm': 2.7315189838409424, 'learning_rate': 2.6386666666666673e-06, 'epoch': 0.75} -{'loss': 0.0858, 'grad_norm': 2.3389735221862793, 'learning_rate': 2.6053333333333337e-06, 'epoch': 0.76} -{'loss': 0.0764, 'grad_norm': 1.982534646987915, 'learning_rate': 2.572e-06, 'epoch': 0.76} -{'loss': 0.0731, 'grad_norm': 1.9040074348449707, 'learning_rate': 2.538666666666667e-06, 'epoch': 0.76} -{'loss': 0.0758, 'grad_norm': 2.654710054397583, 'learning_rate': 2.5053333333333334e-06, 'epoch': 0.77} -{'loss': 0.0824, 'grad_norm': 2.6400296688079834, 'learning_rate': 2.4720000000000002e-06, 'epoch': 0.77} -{'loss': 0.0822, 'grad_norm': 7.269197463989258, 'learning_rate': 2.438666666666667e-06, 'epoch': 0.77} -{'loss': 0.0818, 'grad_norm': 2.363656520843506, 'learning_rate': 2.4053333333333335e-06, 'epoch': 0.78} -{'loss': 0.0768, 'grad_norm': 2.4660115242004395, 'learning_rate': 2.3720000000000003e-06, 'epoch': 0.78} -{'loss': 0.0783, 'grad_norm': 3.3116371631622314, 'learning_rate': 2.3386666666666668e-06, 'epoch': 0.78} -{'loss': 0.0899, 'grad_norm': 2.595853090286255, 'learning_rate': 2.3053333333333336e-06, 'epoch': 0.78} -{'loss': 0.0953, 'grad_norm': 2.709597587585449, 'learning_rate': 2.2720000000000004e-06, 'epoch': 0.79} -{'loss': 0.1249, 'grad_norm': 2.4446637630462646, 'learning_rate': 2.238666666666667e-06, 'epoch': 0.79} -{'loss': 0.1171, 'grad_norm': 3.4412341117858887, 'learning_rate': 2.2053333333333333e-06, 'epoch': 0.79} -{'loss': 0.1065, 'grad_norm': 2.2719008922576904, 'learning_rate': 2.172e-06, 'epoch': 0.8} -{'loss': 0.0872, 'grad_norm': 1.9873290061950684, 'learning_rate': 2.138666666666667e-06, 'epoch': 0.8} -{'loss': 0.0765, 'grad_norm': 2.487403392791748, 'learning_rate': 2.1053333333333334e-06, 'epoch': 0.8} -{'loss': 0.0736, 'grad_norm': 2.4424736499786377, 'learning_rate': 2.0720000000000002e-06, 'epoch': 0.81} -{'loss': 0.1064, 'grad_norm': 3.1507577896118164, 'learning_rate': 2.0386666666666667e-06, 'epoch': 0.81} -{'loss': 0.0993, 'grad_norm': 2.6285648345947266, 'learning_rate': 2.0053333333333335e-06, 'epoch': 0.81} -{'loss': 0.1299, 'grad_norm': 4.1934967041015625, 'learning_rate': 1.972e-06, 'epoch': 0.82} -{'loss': 0.1195, 'grad_norm': 3.031852960586548, 'learning_rate': 1.9386666666666668e-06, 'epoch': 0.82} -{'loss': 0.1197, 'grad_norm': 2.9288837909698486, 'learning_rate': 1.9053333333333334e-06, 'epoch': 0.82} -{'loss': 0.1127, 'grad_norm': 2.890054225921631, 'learning_rate': 1.8720000000000002e-06, 'epoch': 0.82} -{'loss': 0.1155, 'grad_norm': 3.130406618118286, 'learning_rate': 1.8386666666666669e-06, 'epoch': 0.83} -{'loss': 0.1291, 'grad_norm': 2.7169485092163086, 'learning_rate': 1.8053333333333333e-06, 'epoch': 0.83} -{'loss': 0.1097, 'grad_norm': 2.7390034198760986, 'learning_rate': 1.7720000000000001e-06, 'epoch': 0.83} -{'loss': 0.1022, 'grad_norm': 2.161604166030884, 'learning_rate': 1.7386666666666668e-06, 'epoch': 0.84} -{'loss': 0.0779, 'grad_norm': 2.210451126098633, 'learning_rate': 1.7053333333333336e-06, 'epoch': 0.84} -{'loss': 0.0728, 'grad_norm': 2.426438808441162, 'learning_rate': 1.672e-06, 'epoch': 0.84} -{'loss': 0.0859, 'grad_norm': 2.8744237422943115, 'learning_rate': 1.6386666666666667e-06, 'epoch': 0.85} -{'loss': 0.1496, 'grad_norm': 2.8165483474731445, 'learning_rate': 1.6053333333333335e-06, 'epoch': 0.85} -{'loss': 0.1343, 'grad_norm': 4.0077738761901855, 'learning_rate': 1.5720000000000002e-06, 'epoch': 0.85} -{'loss': 0.1397, 'grad_norm': 3.8011586666107178, 'learning_rate': 1.538666666666667e-06, 'epoch': 0.86} -{'loss': 0.1262, 'grad_norm': 2.7379047870635986, 'learning_rate': 1.5053333333333334e-06, 'epoch': 0.86} -{'loss': 0.1188, 'grad_norm': 3.250950574874878, 'learning_rate': 1.472e-06, 'epoch': 0.86} -{'loss': 0.1103, 'grad_norm': 2.782945156097412, 'learning_rate': 1.438666666666667e-06, 'epoch': 0.87} -{'loss': 0.1147, 'grad_norm': 3.08154034614563, 'learning_rate': 1.4053333333333335e-06, 'epoch': 0.87} -{'loss': 0.1332, 'grad_norm': 3.5768070220947266, 'learning_rate': 1.372e-06, 'epoch': 0.87} -{'loss': 0.1282, 'grad_norm': 3.155341863632202, 'learning_rate': 1.3386666666666668e-06, 'epoch': 0.88} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 18:05:21,315 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 18:05:21,315 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 18:05:29,617 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:29,786 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:31,140 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:32,626 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:34,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:35,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:36,498 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:37,606 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:38,714 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:39,829 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:40,947 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:41,951 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:43,000 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:43,995 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:46,219 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:47,289 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:48,188 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:49,089 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:50,124 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:51,096 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:52,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:53,010 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:54,011 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:54,997 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:56,008 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:56,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:57,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:05:59,069 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:00,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:01,182 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:02,235 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:03,336 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:04,394 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:05,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:06,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:07,602 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:08,666 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:09,635 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:10,772 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:11,802 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:14,040 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:15,034 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:16,090 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:17,033 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:18,017 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:18,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:20,075 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:21,080 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:22,056 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:23,182 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:24,195 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:25,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:26,311 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:27,249 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:28,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:29,277 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:30,376 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:31,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:32,249 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:33,311 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:34,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:35,325 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:36,313 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:37,365 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:38,382 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:39,310 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:40,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:41,232 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:42,247 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:43,299 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:44,301 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:45,300 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:46,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:47,435 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:48,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:49,528 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:50,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:51,520 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:52,499 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:53,512 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:54,484 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:55,473 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:56,427 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:57,442 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:58,505 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:06:59,556 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:00,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:01,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:02,709 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:03,769 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:04,936 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:05,980 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:07,015 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:08,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:08,995 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:10,103 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:11,162 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:12,199 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:13,229 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:14,216 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:15,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:16,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:17,337 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:18,322 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:19,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:20,559 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:21,607 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:22,656 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:23,769 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:24,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:25,710 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:26,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:27,805 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:28,761 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:29,703 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:30,724 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:31,673 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:32,752 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:33,806 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:34,818 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:35,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:36,851 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:37,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:38,853 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:39,863 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:40,840 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:41,815 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:42,803 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:43,835 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:44,832 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:45,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:46,820 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:07:47,748 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. - 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 7000/8000 [2:40:45<18:54, 1.13s/it][INFO|trainer.py:3860] 2025-02-12 18:07:56,147 >> Saving model checkpoint to ./checkpoint-7000 -{'eval_loss': 0.23438745737075806, 'eval_wer': 11.391044218005048, 'eval_runtime': 154.8314, 'eval_samples_per_second': 13.589, 'eval_steps_per_second': 0.853, 'epoch': 0.88} -[INFO|configuration_utils.py:423] 2025-02-12 18:07:56,148 >> Configuration saved in ./checkpoint-7000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 18:07:56,149 >> Configuration saved in ./checkpoint-7000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 18:07:57,530 >> Model weights saved in ./checkpoint-7000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 18:07:57,531 >> Feature extractor saved in ./checkpoint-7000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 18:08:00,757 >> Feature extractor saved in ./preprocessor_config.json -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs}) - 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7001/8000 [2:40:54<13:49:55, 49.85s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 18:08:05 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. - 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7005/8000 [2:41:00<3:37:53, 13.14s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( - -{'loss': 0.2406, 'grad_norm': 11.062019348144531, 'learning_rate': 1.308e-06, 'epoch': 0.88} -{'loss': 0.3469, 'grad_norm': 4.648179531097412, 'learning_rate': 1.2746666666666669e-06, 'epoch': 0.88} -{'loss': 0.3421, 'grad_norm': 4.388245105743408, 'learning_rate': 1.2413333333333335e-06, 'epoch': 0.88} -{'loss': 0.2847, 'grad_norm': 4.806427478790283, 'learning_rate': 1.2080000000000001e-06, 'epoch': 0.89} -{'loss': 0.1671, 'grad_norm': 3.0818049907684326, 'learning_rate': 1.1746666666666668e-06, 'epoch': 0.89} -{'loss': 0.1313, 'grad_norm': 4.117819309234619, 'learning_rate': 1.1413333333333334e-06, 'epoch': 0.89} -{'loss': 0.1177, 'grad_norm': 2.8558835983276367, 'learning_rate': 1.108e-06, 'epoch': 0.9} -{'loss': 0.0911, 'grad_norm': 3.0425021648406982, 'learning_rate': 1.0746666666666669e-06, 'epoch': 0.9} -{'loss': 0.0898, 'grad_norm': 2.6587588787078857, 'learning_rate': 1.0413333333333333e-06, 'epoch': 0.9} -{'loss': 0.0922, 'grad_norm': 1.7572664022445679, 'learning_rate': 1.0080000000000001e-06, 'epoch': 0.91} -{'loss': 0.0753, 'grad_norm': 2.00393009185791, 'learning_rate': 9.746666666666668e-07, 'epoch': 0.91} -{'loss': 0.0628, 'grad_norm': 1.845981478691101, 'learning_rate': 9.413333333333334e-07, 'epoch': 0.91} -{'loss': 0.0696, 'grad_norm': 2.008112907409668, 'learning_rate': 9.080000000000001e-07, 'epoch': 0.92} -{'loss': 0.0897, 'grad_norm': 2.837357759475708, 'learning_rate': 8.746666666666668e-07, 'epoch': 0.92} -{'loss': 0.1227, 'grad_norm': 2.4842417240142822, 'learning_rate': 8.413333333333334e-07, 'epoch': 0.92} -{'loss': 0.1012, 'grad_norm': 2.7866716384887695, 'learning_rate': 8.08e-07, 'epoch': 0.93} -{'loss': 0.1141, 'grad_norm': 2.1826930046081543, 'learning_rate': 7.746666666666668e-07, 'epoch': 0.93} -{'loss': 0.0754, 'grad_norm': 2.014090061187744, 'learning_rate': 7.413333333333333e-07, 'epoch': 0.93} -{'loss': 0.0736, 'grad_norm': 2.1539175510406494, 'learning_rate': 7.08e-07, 'epoch': 0.93} -{'loss': 0.0684, 'grad_norm': 2.712541341781616, 'learning_rate': 6.746666666666667e-07, 'epoch': 0.94} -{'loss': 0.1414, 'grad_norm': 3.281242847442627, 'learning_rate': 6.413333333333334e-07, 'epoch': 0.94} -{'loss': 0.1895, 'grad_norm': 4.088025093078613, 'learning_rate': 6.08e-07, 'epoch': 0.94} -{'loss': 0.222, 'grad_norm': 4.144560813903809, 'learning_rate': 5.746666666666667e-07, 'epoch': 0.95} -{'loss': 0.1349, 'grad_norm': 1.8468823432922363, 'learning_rate': 5.413333333333334e-07, 'epoch': 0.95} -{'loss': 0.0872, 'grad_norm': 2.5354621410369873, 'learning_rate': 5.08e-07, 'epoch': 0.95} -{'loss': 0.0725, 'grad_norm': 1.83882737159729, 'learning_rate': 4.746666666666667e-07, 'epoch': 0.96} -{'loss': 0.0988, 'grad_norm': 3.42556095123291, 'learning_rate': 4.413333333333333e-07, 'epoch': 0.96} -{'loss': 0.1166, 'grad_norm': 2.682558059692383, 'learning_rate': 4.0800000000000005e-07, 'epoch': 0.96} -{'loss': 0.1257, 'grad_norm': 3.2471797466278076, 'learning_rate': 3.7466666666666674e-07, 'epoch': 0.97} -{'loss': 0.1114, 'grad_norm': 2.4202020168304443, 'learning_rate': 3.4133333333333337e-07, 'epoch': 0.97} -{'loss': 0.0811, 'grad_norm': 2.8282711505889893, 'learning_rate': 3.0800000000000006e-07, 'epoch': 0.97} -{'loss': 0.104, 'grad_norm': 4.20676326751709, 'learning_rate': 2.746666666666667e-07, 'epoch': 0.97} -{'loss': 0.2773, 'grad_norm': 4.955998420715332, 'learning_rate': 2.413333333333333e-07, 'epoch': 0.98} -{'loss': 0.1105, 'grad_norm': 2.0168468952178955, 'learning_rate': 2.08e-07, 'epoch': 0.98} -{'loss': 0.0808, 'grad_norm': 1.6335862874984741, 'learning_rate': 1.7466666666666667e-07, 'epoch': 0.98} -{'loss': 0.0786, 'grad_norm': 2.269954204559326, 'learning_rate': 1.4133333333333333e-07, 'epoch': 0.99} -{'loss': 0.0801, 'grad_norm': 2.0813560485839844, 'learning_rate': 1.0800000000000001e-07, 'epoch': 0.99} -{'loss': 0.0687, 'grad_norm': 1.6093230247497559, 'learning_rate': 7.466666666666667e-08, 'epoch': 0.99} -{'loss': 0.0814, 'grad_norm': 1.730695366859436, 'learning_rate': 4.133333333333334e-08, 'epoch': 1.0} -02/12/2025 18:28:18 - WARNING - huggingface_hub.utils._http - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 009f3961-f00b-4456-8f3b-f5beeed73ba9)')' thrown while requesting GET https://huggingface.co/datasets/asierhv/composite_corpus_eu_v2.1/resolve/2c2153d993ce951528b5b62eb207632c6d267c29/data/train-00011-of-00150.parquet -Retrying in 1s [Retry 1/5]. -02/12/2025 18:28:18 - WARNING - huggingface_hub.utils._http - Retrying in 1s [Retry 1/5]. -100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [3:01:15<00:00, 1.12s/it][INFO|trainer.py:4176] 2025-02-12 18:28:26,590 >> -{'loss': 0.0959, 'grad_norm': 3.418311595916748, 'learning_rate': 8e-09, 'epoch': 1.0} -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 18:28:26,591 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 18:28:26,591 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 18:28:36,443 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:36,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:37,929 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:39,629 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:40,907 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:42,147 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:43,290 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:44,383 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:45,515 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:46,714 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:47,807 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:48,974 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:50,030 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:51,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:51,940 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:53,011 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:53,928 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:54,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:55,861 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:56,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:57,733 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:58,729 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:28:59,711 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:00,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:01,667 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:02,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:03,638 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:04,705 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:05,860 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:06,791 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:07,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:08,866 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:09,912 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:10,983 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:11,974 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:13,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:14,105 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:15,079 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:16,146 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:17,157 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:19,369 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:20,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:21,395 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:22,338 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:23,321 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:24,296 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:25,367 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:26,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:27,341 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:28,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:29,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:30,504 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:31,539 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:32,463 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:33,471 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:34,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:35,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:36,481 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:37,440 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:38,514 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:39,450 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:40,506 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:41,508 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:42,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:43,575 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:44,506 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:45,387 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:46,407 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:47,411 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:48,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:49,418 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:50,409 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:51,497 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:52,490 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:53,574 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:54,590 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:55,627 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:56,552 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:57,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:58,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:29:59,542 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:00,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:01,497 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:02,513 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:03,573 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:04,628 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:05,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:06,649 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:07,783 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:08,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:09,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:11,030 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:12,070 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:13,037 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:14,025 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:15,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:16,199 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:17,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:18,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:19,243 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:20,274 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:21,301 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:22,390 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:23,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:24,445 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:25,581 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:26,601 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:27,726 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:28,833 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:29,822 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:30,792 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:31,830 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:32,877 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:33,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:34,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:35,764 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:36,727 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:37,789 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:38,822 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:39,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:40,838 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:41,863 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:42,813 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:43,852 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:44,875 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:45,853 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:46,820 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:47,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:48,833 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:49,830 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:50,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:51,814 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:30:52,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [3:03:50<00:00, 1.12s/it][INFO|trainer.py:3860] 2025-02-12 18:31:01,025 >> Saving model checkpoint to ./checkpoint-8000 -{'eval_loss': 0.1835634410381317, 'eval_wer': 10.886229784051602, 'eval_runtime': 154.4338, 'eval_samples_per_second': 13.624, 'eval_steps_per_second': 0.855, 'epoch': 1.0} -[INFO|configuration_utils.py:423] 2025-02-12 18:31:01,026 >> Configuration saved in ./checkpoint-8000/config.json -[INFO|configuration_utils.py:906] 2025-02-12 18:31:01,027 >> Configuration saved in ./checkpoint-8000/generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 18:31:02,413 >> Model weights saved in ./checkpoint-8000/model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 18:31:02,414 >> Feature extractor saved in ./checkpoint-8000/preprocessor_config.json -[INFO|feature_extraction_utils.py:437] 2025-02-12 18:31:05,951 >> Feature extractor saved in ./preprocessor_config.json -[INFO|trainer.py:2621] 2025-02-12 18:31:05,974 >> - -Training completed. Do not forget to share your model on huggingface.co/models =) - - -[INFO|trainer.py:2859] 2025-02-12 18:31:05,974 >> Loading best model from ./checkpoint-8000 (score: 10.886229784051602). -[WARNING|trainer.py:2996] 2025-02-12 18:31:06,378 >> There were missing keys in the checkpoint model loaded: ['proj_out.weight']. -100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [3:03:55<00:00, 1.38s/it] -{'train_runtime': 11036.9074, 'train_samples_per_second': 23.195, 'train_steps_per_second': 0.725, 'train_loss': 0.17036041705310345, 'epoch': 1.0} -[INFO|trainer.py:4641] 2025-02-12 18:31:06,419 >> Waiting for the current checkpoint push to be finished, this might take a couple of minutes. -It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 18:31:10 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -[INFO|trainer.py:3860] 2025-02-12 18:32:59,287 >> Saving model checkpoint to ./ -[INFO|configuration_utils.py:423] 2025-02-12 18:32:59,288 >> Configuration saved in ./config.json -[INFO|configuration_utils.py:906] 2025-02-12 18:32:59,290 >> Configuration saved in ./generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 18:33:01,543 >> Model weights saved in ./model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 18:33:01,544 >> Feature extractor saved in ./preprocessor_config.json -[INFO|trainer.py:3860] 2025-02-12 18:33:01,545 >> Saving model checkpoint to ./ -[INFO|configuration_utils.py:423] 2025-02-12 18:33:01,546 >> Configuration saved in ./config.json -[INFO|configuration_utils.py:906] 2025-02-12 18:33:01,547 >> Configuration saved in ./generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 18:33:03,594 >> Model weights saved in ./model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 18:33:03,596 >> Feature extractor saved in ./preprocessor_config.json -[INFO|modelcard.py:449] 2025-02-12 18:33:03,826 >> Dropping the following result as it does not have all the necessary fields: -{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 10.886229784051602}]} -It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 18:33:07 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -run-lejyafmi.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.62M/4.62M [00:01<00:00, 3.10MB/s] -***** train metrics ***** - epoch = 1.0 - total_flos = 68804121093GF - train_loss = 0.1704 - train_runtime = 3:03:56.90 - train_samples_per_second = 23.195 - train_steps_per_second = 0.725 -02/12/2025 18:34:14 - INFO - __main__ - *** Evaluate *** -[INFO|trainer.py:4176] 2025-02-12 18:34:14,390 >> -***** Running Evaluation ***** -[INFO|trainer.py:4180] 2025-02-12 18:34:14,390 >> Num examples: Unknown -[INFO|trainer.py:4181] 2025-02-12 18:34:14,390 >> Batch size = 16 -[INFO|trainer_utils.py:837] 2025-02-12 18:34:21,770 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:21,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:23,093 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:24,451 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:25,719 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:26,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:28,007 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:29,124 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:30,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:31,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:32,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:33,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:34,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:35,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:36,700 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:37,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:38,634 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:39,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:40,537 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:41,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:42,380 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:43,354 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:44,341 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:45,314 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:46,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:47,281 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:48,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:49,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:50,458 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:51,370 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:52,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:53,396 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:54,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:55,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:56,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:57,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:58,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:34:59,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:00,681 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:01,717 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:03,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:04,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:05,993 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:06,930 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:07,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:08,880 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:09,968 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:10,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:11,921 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:13,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:14,074 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:15,109 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:16,148 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:17,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:18,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:19,129 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:20,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:21,190 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:22,160 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:23,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:24,153 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:25,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:26,238 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:27,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:28,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:29,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:30,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:31,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:32,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:33,287 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:34,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:35,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:36,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:37,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:38,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:39,530 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:40,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:41,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:42,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:43,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:44,509 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:45,496 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:46,461 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:47,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:48,535 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:49,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:50,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:51,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:52,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:53,784 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:54,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:55,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:56,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:57,957 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:35:58,926 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:00,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:01,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:02,083 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:03,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:04,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:05,141 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:06,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:07,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:08,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:09,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:10,412 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:11,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:12,466 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:13,562 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:14,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:15,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:16,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:17,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:18,568 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:19,501 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:20,504 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:21,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:22,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:23,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:24,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:25,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:26,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:27,543 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:28,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:29,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:30,585 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:31,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:32,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:33,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:34,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:35,552 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:36,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -[INFO|generation_whisper.py:1844] 2025-02-12 18:36:37,437 >> Increase max_length from 225 to 228 since input is conditioned on previous segment. -***** eval metrics ***** - epoch = 1.0 - eval_loss = 0.1836 - eval_runtime = 0:02:31.38 - eval_samples_per_second = 13.899 - eval_steps_per_second = 0.872 - eval_wer = 10.8862 -[INFO|trainer.py:3860] 2025-02-12 18:36:45,773 >> Saving model checkpoint to ./ -[INFO|configuration_utils.py:423] 2025-02-12 18:36:45,774 >> Configuration saved in ./config.json -[INFO|configuration_utils.py:906] 2025-02-12 18:36:45,775 >> Configuration saved in ./generation_config.json -[INFO|modeling_utils.py:3040] 2025-02-12 18:36:47,949 >> Model weights saved in ./model.safetensors -[INFO|feature_extraction_utils.py:437] 2025-02-12 18:36:47,950 >> Feature extractor saved in ./preprocessor_config.json -It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -02/12/2025 18:36:51 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI. - warnings.warn( -run-lejyafmi.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69M/4.69M [00:01<00:00, 3.23MB/s] diff --git a/wandb/run-20250212_152709-lejyafmi/files/requirements.txt b/wandb/run-20250212_152709-lejyafmi/files/requirements.txt deleted file mode 100644 index 93e48b00f9c3b676c687ab25917704b36ec3471d..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152709-lejyafmi/files/requirements.txt +++ /dev/null @@ -1,115 +0,0 @@ -aiosignal==1.3.2 -Markdown==3.7 -more-itertools==10.6.0 -requests==2.32.3 -sentry-sdk==2.21.0 -torchaudio==2.6.0 -charset-normalizer==3.4.1 -docker-pycreds==0.4.0 -nvidia-cusolver-cu12==11.6.1.9 -PyYAML==6.0.2 -librosa==0.10.2.post1 -soxr==0.5.0.post1 -multiprocess==0.70.16 -setuptools==75.8.0 -nvidia-cufft-cu12==11.2.1.3 -joblib==1.4.2 -pytz==2025.1 -pip==24.0 -scikit-learn==1.6.1 -certifi==2025.1.31 -jiwer==3.1.0 -regex==2024.11.6 -annotated-types==0.7.0 -grpcio==1.70.0 -msgpack==1.1.0 -mpmath==1.3.0 -nvidia-cudnn-cu12==9.1.0.70 -soundfile==0.13.1 -dill==0.3.8 -nvidia-nvtx-cu12==12.4.127 -six==1.17.0 -nvidia-cuda-cupti-cu12==12.4.127 -pyarrow==19.0.0 -nvidia-nccl-cu12==2.21.5 -psutil==6.1.1 -decorator==5.1.1 -llvmlite==0.44.0 -frozenlist==1.5.0 -pydantic==2.10.6 -networkx==3.4.2 -idna==3.10 -wandb==0.19.6 -aiohttp==3.11.12 -RapidFuzz==3.12.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -numpy==2.1.3 -tokenizers==0.21.0 -nvidia-cusparselt-cu12==0.6.2 -typing_extensions==4.12.2 -urllib3==2.3.0 -setproctitle==1.3.4 -tzdata==2025.1 -sympy==1.13.1 -pooch==1.8.2 -click==8.1.8 -pydantic_core==2.27.2 -MarkupSafe==3.0.2 -scipy==1.15.1 -accelerate==1.3.0 -tensorboard==2.19.0 -protobuf==5.29.3 -gitdb==4.0.12 -smmap==5.0.2 -absl-py==2.1.0 -tqdm==4.67.1 -yarl==1.18.3 -pycparser==2.22 -nvidia-cusparse-cu12==12.3.1.170 -attrs==25.1.0 -lazy_loader==0.4 -tensorboard-data-server==0.7.2 -threadpoolctl==3.5.0 -GitPython==3.1.44 -safetensors==0.5.2 -fsspec==2024.12.0 -nvidia-cuda-nvrtc-cu12==12.4.127 -filelock==3.17.0 -aiohappyeyeballs==2.4.6 -packaging==24.2 -datasets==3.2.1.dev0 -audioread==3.0.1 -propcache==0.2.1 -transformers==4.49.0.dev0 -nvidia-cuda-runtime-cu12==12.4.127 -cffi==1.17.1 -evaluate==0.4.3 -Werkzeug==3.1.3 -huggingface-hub==0.28.1 -Jinja2==3.1.5 -torch==2.6.0 -nvidia-curand-cu12==10.3.5.147 -xxhash==3.5.0 -platformdirs==4.3.6 -multidict==6.1.0 -nvidia-cublas-cu12==12.4.5.8 -nvidia-nvjitlink-cu12==12.4.127 -triton==3.2.0 -numba==0.61.0 -importlib_metadata==8.0.0 -platformdirs==4.2.2 -typeguard==4.3.0 -more-itertools==10.3.0 -tomli==2.0.1 -autocommand==2.2.2 -zipp==3.19.2 -typing_extensions==4.12.2 -backports.tarfile==1.2.0 -inflect==7.3.1 -jaraco.text==3.12.1 -wheel==0.43.0 -packaging==24.2 -jaraco.collections==5.1.0 -jaraco.functools==4.0.1 -jaraco.context==5.3.0 diff --git a/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json b/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json deleted file mode 100644 index 15c28b0022a20f8df79ea54c97b8c208b5b9a6cb..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", - "python": "CPython 3.12.3", - "startedAt": "2025-02-12T15:27:10.113915Z", - "args": [ - "--model_name_or_path=openai/whisper-small", - "--dataset_name=asierhv/composite_corpus_eu_v2.1", - "--language=basque", - "--train_split_name=train", - "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr", - "--model_index_name=Whisper Small Basque", - "--max_steps=8000", - "--output_dir=./", - "--per_device_train_batch_size=32", - "--per_device_eval_batch_size=16", - "--gradient_accumulation_steps=1", - "--logging_steps=25", - "--learning_rate=1e-5", - "--warmup_steps=500", - "--evaluation_strategy=steps", - "--eval_steps=1000", - "--save_strategy=steps", - "--save_steps=1000", - "--generation_max_length=225", - "--length_column_name=input_length", - "--max_duration_in_seconds=30", - "--audio_column_name=audio", - "--text_column_name=sentence", - "--freeze_feature_encoder=False", - "--report_to=tensorboard", - "--metric_for_best_model=wer", - "--greater_is_better=False", - "--load_best_model_at_end", - "--gradient_checkpointing", - "--fp16", - "--overwrite_output_dir", - "--do_train", - "--do_eval", - "--predict_with_generate", - "--do_normalize_eval", - "--streaming", - "--use_auth_token", - "--push_to_hub", - "--report_to", - "wandb", - "--run_name", - "whisper-small-eu" - ], - "program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", - "codePath": "run_speech_recognition_seq2seq_streaming.py", - "git": { - "remote": "https://huggingface.co/xezpeleta/whisper-small-eu", - "commit": "9c975864b20b4df94398a870e97cad2934253ec3" - }, - "email": "xezpeleta@gmail.com", - "root": "/home/tknika/xezpeleta/whisper/whisper-small-eu", - "host": "tknika", - "executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python", - "codePathLocal": "run_speech_recognition_seq2seq_streaming.py", - "cpu_count": 8, - "cpu_count_logical": 8, - "gpu": "NVIDIA L40-48Q", - "gpu_count": 1, - "disk": { - "/": { - "total": "525987168256", - "used": "315242278912" - } - }, - "memory": { - "total": "33654022144" - }, - "cpu": { - "count": 8, - "countLogical": 8 - }, - "gpu_nvidia": [ - { - "name": "NVIDIA L40-48Q", - "memoryTotal": "51539607552", - "cudaCores": 18176, - "architecture": "Ada" - } - ], - "cudaVersion": "12.4" -} \ No newline at end of file diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log b/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log deleted file mode 100644 index b6d594cdf89ccbf6c593c3f1737e25809a758e18..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log +++ /dev/null @@ -1,6 +0,0 @@ -{"time":"2025-02-12T15:27:09.931528058Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqvyk3k9m/port-243546.txt","pid":243546,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} -{"time":"2025-02-12T15:27:09.940481802Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":243546} -{"time":"2025-02-12T15:27:09.940461772Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38449,"Zone":""}} -{"time":"2025-02-12T15:27:10.107162928Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:41928"} -{"time":"2025-02-12T15:27:10.115855515Z","level":"INFO","msg":"handleInformInit: received","streamId":"lejyafmi","id":"127.0.0.1:41928"} -{"time":"2025-02-12T15:27:10.220025051Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"lejyafmi","id":"127.0.0.1:41928"} diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log b/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log deleted file mode 100644 index 3e76e3e81a1059aef65345804a4f269bcd9183ad..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log +++ /dev/null @@ -1,7 +0,0 @@ -{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"} -{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"} -{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"} diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug.log b/wandb/run-20250212_152709-lejyafmi/logs/debug.log deleted file mode 100644 index d6ccd57adcf2bbe370a51abbc03e1e4a2718eb85..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152709-lejyafmi/logs/debug.log +++ /dev/null @@ -1,25 +0,0 @@ -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6 -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546 -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():756] calling init triggers -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {} -config: {'_wandb': {}} -2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():789] starting backend -2025-02-12 15:27:10,107 INFO MainThread:243546 [wandb_init.py:init():793] sending inform_init request -2025-02-12 15:27:10,112 INFO MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-02-12 15:27:10,113 INFO MainThread:243546 [wandb_init.py:init():808] backend started and connected -2025-02-12 15:27:10,115 INFO MainThread:243546 [wandb_init.py:init():901] updated telemetry -2025-02-12 15:27:10,122 INFO MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout -2025-02-12 15:27:10,584 INFO MainThread:243546 [wandb_init.py:init():994] starting run threads in backend -2025-02-12 15:27:10,691 INFO MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg -2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw -2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams. -2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed. -2025-02-12 15:27:10,694 INFO MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process -2025-02-12 15:27:10,698 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None} -2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - > -2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None diff --git a/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb b/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb deleted file mode 100644 index 69feb1920a8b41f53fa3307d140365bf10af15fc..0000000000000000000000000000000000000000 --- a/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:218ae98ab28234be327e4ea9293f7b5d13580cf3d80509614063d5a55716991b -size 4685824