diff --git a/.gitattributes b/.gitattributes
index c7d9f3332a950355d5a77d85000f05e6f45435ea..eaf178ffbae746781361968e352cd8b6f9e9927d 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb filter=lfs diff=lfs merge=lfs -text
diff --git a/config.json b/config.json
index d3d5d1fea84ea23cc13794f6caf7c954e20ab1f1..30ce696c074e422fea7cf3f8ffa0b1b94ae3dec5 100644
--- a/config.json
+++ b/config.json
@@ -31,7 +31,7 @@
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
   "mask_time_prob": 0.05,
-  "max_length": 448,
+  "max_length": null,
   "max_source_positions": 1500,
   "max_target_positions": 448,
   "median_filter_width": 7,
@@ -41,7 +41,7 @@
   "pad_token_id": 50257,
   "scale_embedding": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.46.0.dev0",
+  "transformers_version": "4.49.0.dev0",
   "use_cache": false,
   "use_weighted_layer_sum": false,
   "vocab_size": 51865
diff --git a/inspect_dataset.py b/inspect_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..53e145fb2d4b19a3eadeacb715f33351d305759c
--- /dev/null
+++ b/inspect_dataset.py
@@ -0,0 +1,26 @@
+from datasets import load_dataset
+import json
+
+# Load the first few examples of the dataset
+dataset = load_dataset("asierhv/composite_corpus_eu_v2.1", split="train", streaming=True)
+
+# Get the first example
+examples = []
+for i, example in enumerate(dataset):
+    if i >= 3:  # Get first 3 examples
+        break
+    examples.append(example)
+
+# Print the structure and content
+for i, example in enumerate(examples):
+    print(f"\nExample {i+1}:")
+    for key, value in example.items():
+        if key == "audio":
+            print(f"audio keys: {value.keys()}")
+            for audio_key, audio_value in value.items():
+                if isinstance(audio_value, bytes) or isinstance(audio_value, memoryview):
+                    print(f"  {audio_key}: <binary data>")
+                else:
+                    print(f"  {audio_key}: {audio_value}")
+        else:
+            print(f"{key}: {value}")
\ No newline at end of file
diff --git a/model.safetensors b/model.safetensors
index cde5a257df7d4cde0c2bf78e3d0f8a17fa87607e..3b2bf3928da4f32e0a070a831161b13ced1c6569 100644
--- a/model.safetensors
+++ b/model.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51160e502a9753fabd3f154ffcebb40049941544eb344c4b8fc8243205072f73
+oid sha256:4d2fd3b1746a32b70ee58ee1a3c90a88042e6300b79bcf3fd6d5bfc260af06f0
 size 966995080
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f0abbed0246e82d7416cfcdb17c194cb423cb1b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+torch>=1.7
+torchaudio
+git+https://github.com/huggingface/transformers
+git+https://github.com/huggingface/datasets
+librosa
+jiwer
+evaluate>=0.3.0
+more-itertools
+tensorboard
+accelerate>=0.26.0
+wandb>=0.19.6
diff --git a/run.sh b/run.sh
index 30f23a7547aff7c5e4c1df5f14d973d2d0953d4c..d57ed7b151d05932bc1e4068432b2fcd34018634 100644
--- a/run.sh
+++ b/run.sh
@@ -1,12 +1,12 @@
+WANDB_PROJECT=whisper-small-eu \
 python run_speech_recognition_seq2seq_streaming.py \
 	--model_name_or_path="openai/whisper-small" \
-	--dataset_name="mozilla-foundation/common_voice_17_0" \
-	--dataset_config_name="eu" \
+	--dataset_name="asierhv/composite_corpus_eu_v2.1" \
 	--language="basque" \
-	--train_split_name="train+validation" \
-	--eval_split_name="test" \
+	--train_split_name="train" \
+	--eval_split_name="dev_parl+test_parl+test_cv+test_oslr" \
 	--model_index_name="Whisper Small Basque" \
-	--max_steps="5000" \
+	--max_steps="8000" \
 	--output_dir="./" \
 	--per_device_train_batch_size="32" \
 	--per_device_eval_batch_size="16" \
@@ -21,6 +21,7 @@ python run_speech_recognition_seq2seq_streaming.py \
 	--generation_max_length="225" \
 	--length_column_name="input_length" \
 	--max_duration_in_seconds="30" \
+	--audio_column_name="audio" \
 	--text_column_name="sentence" \
 	--freeze_feature_encoder="False" \
 	--report_to="tensorboard" \
@@ -36,4 +37,6 @@ python run_speech_recognition_seq2seq_streaming.py \
 	--do_normalize_eval \
 	--streaming \
 	--use_auth_token \
-	--push_to_hub
+	--push_to_hub \
+	--report_to "wandb" \
+	--run_name "whisper-small-eu"
diff --git a/run_cv.sh b/run_cv.sh
new file mode 100644
index 0000000000000000000000000000000000000000..30f23a7547aff7c5e4c1df5f14d973d2d0953d4c
--- /dev/null
+++ b/run_cv.sh
@@ -0,0 +1,39 @@
+python run_speech_recognition_seq2seq_streaming.py \
+	--model_name_or_path="openai/whisper-small" \
+	--dataset_name="mozilla-foundation/common_voice_17_0" \
+	--dataset_config_name="eu" \
+	--language="basque" \
+	--train_split_name="train+validation" \
+	--eval_split_name="test" \
+	--model_index_name="Whisper Small Basque" \
+	--max_steps="5000" \
+	--output_dir="./" \
+	--per_device_train_batch_size="32" \
+	--per_device_eval_batch_size="16" \
+	--gradient_accumulation_steps="1" \
+	--logging_steps="25" \
+	--learning_rate="1e-5" \
+	--warmup_steps="500" \
+	--evaluation_strategy="steps" \
+	--eval_steps="1000" \
+	--save_strategy="steps" \
+	--save_steps="1000" \
+	--generation_max_length="225" \
+	--length_column_name="input_length" \
+	--max_duration_in_seconds="30" \
+	--text_column_name="sentence" \
+	--freeze_feature_encoder="False" \
+	--report_to="tensorboard" \
+	--metric_for_best_model="wer" \
+	--greater_is_better="False" \
+	--load_best_model_at_end \
+	--gradient_checkpointing \
+	--fp16 \
+	--overwrite_output_dir \
+	--do_train \
+	--do_eval \
+	--predict_with_generate \
+	--do_normalize_eval \
+	--streaming \
+	--use_auth_token \
+	--push_to_hub
diff --git a/run_speech_recognition_seq2seq_streaming.py b/run_speech_recognition_seq2seq_streaming.py
index f9d68b43cc1bc4d2da64cfc69d393e0ee9ad3dda..bc1eac2ea72d875cb25d5d183284bfad0fbcdd27 100644
--- a/run_speech_recognition_seq2seq_streaming.py
+++ b/run_speech_recognition_seq2seq_streaming.py
@@ -25,6 +25,7 @@ import os
 import sys
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Union
+import numpy
 
 import datasets
 import torch
@@ -265,25 +266,58 @@ class DataCollatorSpeechSeq2SeqWithPadding:
         return batch
 
 
-def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
+def load_maybe_streaming_dataset(dataset_name, dataset_config_name, data_args, split="train", streaming=True, **kwargs):
     """
-    Utility function to load a dataset in streaming mode. For datasets with multiple splits,
-    each split is loaded individually and then splits combined by taking alternating examples from
-    each (interleaving).
+    Utility function to load a dataset in streaming mode.
     """
+    logger.info(f"Loading dataset {dataset_name} split {split} (streaming={streaming})")
+    
+    def load_single_split(split_name):
+        logger.info(f"Loading split: {split_name}")
+        ds = load_dataset(
+            dataset_name, 
+            dataset_config_name, 
+            split=split_name, 
+            streaming=streaming, 
+            trust_remote_code=True, 
+            **kwargs
+        )
+        
+        # Add validation transform to ensure consistent audio format
+        def validate_example(example):
+            if not isinstance(example[data_args.audio_column_name], dict):
+                example[data_args.audio_column_name] = {
+                    'array': example[data_args.audio_column_name].array,
+                    'sampling_rate': example[data_args.audio_column_name].sampling_rate,
+                    'path': getattr(example[data_args.audio_column_name], 'path', None)
+                }
+            return example
+            
+        ds = ds.map(validate_example)
+        
+        # Log first example structure for debugging
+        try:
+            first_example = next(iter(ds))
+            logger.info(f"First example from {split_name}:")
+            logger.info(f"  Keys: {first_example.keys()}")
+            if data_args.audio_column_name in first_example:
+                audio = first_example[data_args.audio_column_name]
+                logger.info(f"  Audio type: {type(audio)}")
+                if isinstance(audio, dict):
+                    logger.info(f"  Audio keys: {audio.keys()}")
+                    logger.info(f"  Array type: {type(audio['array']) if 'array' in audio else 'missing'}")
+        except Exception as e:
+            logger.warning(f"Could not inspect first example from {split_name}: {e}")
+            
+        return ds
+    
     if "+" in split:
-        # load multiple splits separated by the `+` symbol with streaming mode
-        dataset_splits = [
-            load_dataset(dataset_name, dataset_config_name, split=split_name, streaming=streaming, trust_remote_code=True, **kwargs)
-            for split_name in split.split("+")
-        ]
-        # interleave multiple splits to form one dataset
-        interleaved_dataset = interleave_datasets(dataset_splits)
-        return interleaved_dataset
+        # Load and validate each split individually
+        dataset_splits = [load_single_split(split_name) for split_name in split.split("+")]
+        # Interleave datasets
+        return interleave_datasets(dataset_splits)
     else:
-        # load a single split *with* streaming mode
-        dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
-        return dataset
+        return load_single_split(split)
 
 
 def main():
@@ -356,37 +390,39 @@ def main():
         raw_datasets["train"] = load_maybe_streaming_dataset(
             data_args.dataset_name,
             data_args.dataset_config_name,
+            data_args,
             split=data_args.train_split_name,
-            # xezpeleta
-            #use_auth_token=True if model_args.use_auth_token else None,
             streaming=data_args.streaming,
         )
 
+        # Get features from train dataset since it's guaranteed to exist if do_train is True
+        train_dataset = raw_datasets["train"]
+        first_example = next(iter(train_dataset))
+        raw_datasets_features = list(first_example.keys())
+
+        if data_args.audio_column_name not in raw_datasets_features:
+            raise ValueError(
+                f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+                "Make sure to set `--audio_column_name` to the correct audio column - one of "
+                f"{', '.join(raw_datasets_features)}."
+            )
+
+        if data_args.text_column_name not in raw_datasets_features:
+            raise ValueError(
+                f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+                "Make sure to set `--text_column_name` to the correct text column - one of "
+                f"{', '.join(raw_datasets_features)}."
+            )
+
     if training_args.do_eval:
         raw_datasets["eval"] = load_maybe_streaming_dataset(
             data_args.dataset_name,
             data_args.dataset_config_name,
+            data_args,
             split=data_args.eval_split_name,
-            #use_auth_token=True if model_args.use_auth_token else None,
             streaming=data_args.streaming,
         )
 
-    raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
-
-    if data_args.audio_column_name not in raw_datasets_features:
-        raise ValueError(
-            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
-            "Make sure to set `--audio_column_name` to the correct audio column - one of "
-            f"{', '.join(raw_datasets_features)}."
-        )
-
-    if data_args.text_column_name not in raw_datasets_features:
-        raise ValueError(
-            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
-            "Make sure to set `--text_column_name` to the correct text column - one of "
-            f"{', '.join(raw_datasets_features)}."
-        )
-
     # 5. Load pretrained model, tokenizer, and feature extractor
     #
     # Distributed training:
@@ -438,14 +474,12 @@ def main():
         tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
 
     # 6. Resample speech dataset if necessary
-    dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
-    if dataset_sampling_rate != feature_extractor.sampling_rate:
-        raw_datasets = raw_datasets.cast_column(
-            data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
-        )
+    # For streaming datasets with audio bytes, sampling rate is handled in prepare_dataset
+    logger.info("Using feature extractor sampling rate: %d", feature_extractor.sampling_rate)
+    dataset_sampling_rate = feature_extractor.sampling_rate
 
     # 7. Preprocessing the datasets.
-    # We need to read the audio files as arrays and tokenize the targets.
+    logger.info("Starting dataset preprocessing")
     max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
     min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
     audio_column_name = data_args.audio_column_name
@@ -469,20 +503,59 @@ def main():
             else raw_datasets["eval"].select(range(data_args.max_eval_samples))
         )
 
+    # Inspect dataset before processing
+    for split, dataset in raw_datasets.items():
+        try:
+            first_example = next(iter(dataset))
+            logger.info(f"First example from {split} before processing:")
+            logger.info(f"Keys: {first_example.keys()}")
+            if audio_column_name in first_example:
+                audio_data = first_example[audio_column_name]
+                logger.info(f"Audio column type: {type(audio_data)}")
+                if isinstance(audio_data, dict):
+                    logger.info(f"Audio keys: {audio_data.keys()}")
+        except Exception as e:
+            logger.warning(f"Could not inspect first example from {split}: {e}")
+
     def prepare_dataset(batch):
-        # process audio
-        sample = batch[audio_column_name]
-        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
-        # process audio length
-        batch[model_input_name] = inputs.get(model_input_name)[0]
-        batch["input_length"] = len(sample["array"])
-
-        # process targets
-        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
-        if do_remove_punctuation:
-            input_str = normalizer(input_str).strip()
-        batch["labels"] = tokenizer(input_str).input_ids
-        return batch
+        try:
+            # Validate audio format
+            audio = batch[audio_column_name]
+            
+            # Load audio from bytes if needed
+            if isinstance(audio, dict) and 'bytes' in audio:
+                import io
+                import soundfile as sf
+                audio_bytes = io.BytesIO(audio['bytes'])
+                audio_array, sampling_rate = sf.read(audio_bytes)
+                audio = {'array': audio_array, 'sampling_rate': sampling_rate}
+            
+            # Process audio through feature extractor
+            inputs = feature_extractor(audio['array'], sampling_rate=audio['sampling_rate'])
+            batch["input_length"] = len(audio['array'])
+            batch[model_input_name] = inputs[model_input_name][0]
+            
+            # Process text
+            input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+            if do_remove_punctuation:
+                input_str = normalizer(input_str).strip()
+            batch["labels"] = tokenizer(input_str).input_ids
+            
+            return batch
+            
+        except Exception as e:
+            logger.error(f"Error processing batch in prepare_dataset:")
+            logger.error(f"  Error type: {type(e).__name__}")
+            logger.error(f"  Error message: {str(e)}")
+            logger.error(f"  Batch keys: {list(batch.keys())}")
+            if audio_column_name in batch:
+                audio_data = batch[audio_column_name]
+                logger.error(f"  Audio type: {type(audio_data)}")
+                if isinstance(audio_data, dict):
+                    logger.error(f"  Audio keys: {list(audio_data.keys())}")
+                elif hasattr(audio_data, '__dict__'):
+                    logger.error(f"  Audio attributes: {dir(audio_data)}")
+            raise
 
     with training_args.main_process_first(desc="dataset map pre-processing"):
         vectorized_datasets = raw_datasets.map(
@@ -490,6 +563,16 @@ def main():
             remove_columns=raw_datasets_features,
         ).with_format("torch")
 
+        # Inspect vectorized dataset
+        for split, dataset in vectorized_datasets.items():
+            try:
+                first_example = next(iter(dataset))
+                logger.info(f"First example from {split} after processing:")
+                logger.info(f"Keys: {first_example.keys()}")
+                logger.info(f"Types: {', '.join(f'{k}: {type(v)}' for k, v in first_example.items())}")
+            except Exception as e:
+                logger.warning(f"Could not inspect first example from vectorized {split}: {e}")
+
         if training_args.do_train and data_args.streaming:
             # manually shuffle if streaming (done by the trainer for non-streaming)
             vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
@@ -551,7 +634,13 @@ def main():
     # Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
     # Only required for streaming: Trainer automatically shuffles non-streaming datasets
     class ShuffleCallback(TrainerCallback):
-        def on_epoch_begin(self, args, state, control, train_dataloader, **kwargs):
+        def on_train_begin(self, args, state, control, **kwargs):
+            self.trainer = kwargs.get('trainer')
+
+        def on_epoch_begin(self, args, state, control, **kwargs):
+            if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
+                return
+            train_dataloader = self.trainer.train_dataloader
             if isinstance(train_dataloader.dataset, IterableDatasetShard):
                 pass  # set_epoch() is handled by the Trainer
             elif isinstance(train_dataloader.dataset, IterableDataset):
@@ -563,7 +652,7 @@ def main():
         args=training_args,
         train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
         eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
-        tokenizer=feature_extractor,
+        processing_class=feature_extractor,
         data_collator=data_collator,
         compute_metrics=compute_metrics if training_args.predict_with_generate else None,
         callbacks=[ShuffleCallback()] if data_args.streaming else None,
diff --git a/run_speech_recognition_seq2seq_streaming_cv.py b/run_speech_recognition_seq2seq_streaming_cv.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6b78b89c848663ae43b218c1fcf24d30cb25769
--- /dev/null
+++ b/run_speech_recognition_seq2seq_streaming_cv.py
@@ -0,0 +1,657 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the library models for sequence to sequence speech recognition
+with 🤗 Datasets' streaming mode.
+"""
+# You can also adapt this script for your own sequence to sequence speech
+# recognition task. Pointers for this are left as comments.
+
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+import numpy
+
+import datasets
+import torch
+from datasets import DatasetDict, IterableDatasetDict, interleave_datasets, load_dataset
+from torch.utils.data import IterableDataset
+
+import evaluate
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoFeatureExtractor,
+    AutoModelForSpeechSeq2Seq,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    Seq2SeqTrainer,
+    Seq2SeqTrainingArguments,
+    TrainerCallback,
+    set_seed,
+)
+from transformers.models.whisper.english_normalizer import BasicTextNormalizer
+from transformers.trainer_pt_utils import IterableDatasetShard
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version, send_example_telemetry
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.25.0.dev0")
+
+require_version("datasets>=1.18.2", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
+                "with private models)."
+            )
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    freeze_encoder: bool = field(
+        default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
+    )
+    forced_decoder_ids: List[List[int]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "A list of pairs of integers which indicates a mapping from generation indices to token indices "
+                "that will be forced before sampling. For example, [[0, 123]] means the first generated token "
+                "will always be a token of index 123."
+            )
+        },
+    )
+    suppress_tokens: List[int] = field(
+        default=None, metadata={"help": "A list of tokens that will be suppressed at generation."}
+    )
+    model_index_name: str = field(default=None, metadata={"help": "Pretty name for the model card."})
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": (
+                "For debugging purposes or quicker training, truncate the number of training examples to this "
+                "value if set."
+            )
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": (
+                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+                "value if set."
+            )
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": (
+                "Truncate audio files that are longer than `max_duration_in_seconds` seconds to"
+                " 'max_duration_in_seconds`"
+            )
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="test",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=False,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    do_remove_punctuation: bool = field(
+        default=False,
+        metadata={"help": "Whether the target text should be striped of punctuation."},
+    )
+    do_normalize_eval: bool = field(
+        default=True,
+        metadata={"help": "Whether to normalise the references and predictions in the eval WER calculation."},
+    )
+    language: str = field(
+        default=None,
+        metadata={
+            "help": (
+                "Language for multilingual fine-tuning. This argument should be set for multilingual fine-tuning "
+                "only. For English speech recognition, it should be set to `None`."
+            )
+        },
+    )
+    task: str = field(
+        default="transcribe",
+        metadata={"help": "Task, either `transcribe` for speech recognition or `translate` for speech translation."},
+    )
+    shuffle_buffer_size: Optional[int] = field(
+        default=500,
+        metadata={
+            "help": (
+                "The number of streamed examples to download before shuffling them. The large the buffer, "
+                "the closer it is to real offline shuffling."
+            )
+        },
+    )
+    streaming: bool = field(
+        default=True,
+        metadata={"help": "Whether to use streaming mode to load and pre-process the data."},
+    )
+
+
+@dataclass
+class DataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`WhisperProcessor`])
+            The processor used for processing the data.
+        decoder_start_token_id (`int`)
+            The begin-of-sentence of the decoder.
+    """
+
+    processor: Any
+    decoder_start_token_id: int
+
+    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        model_input_name = self.processor.model_input_names[0]
+        input_features = [{model_input_name: feature[model_input_name]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")
+
+        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")
+
+        # replace padding with -100 to ignore loss correctly
+        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+
+        # if bos token is appended in previous tokenization step,
+        # cut bos token here as it's append later anyways
+        if (labels[:, 0] == self.decoder_start_token_id).all().cpu().item():
+            labels = labels[:, 1:]
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
+    """
+    Utility function to load a dataset in streaming mode. For datasets with multiple splits,
+    each split is loaded individually and then splits combined by taking alternating examples from
+    each (interleaving).
+    """
+    if ("+" in split):
+        # load multiple splits separated by the `+` symbol with streaming mode
+        dataset_splits = [
+            load_dataset(dataset_name, dataset_config_name, split=split_name, streaming=streaming, trust_remote_code=True, **kwargs)
+            for split_name in split.split("+")
+        ]
+        # interleave multiple splits to form one dataset
+        interleaved_dataset = interleave_datasets(dataset_splits)
+        return interleaved_dataset
+    else:
+        # load a single split *with* streaming mode
+        dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
+        return dataset
+
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
+    # information sent is the one passed as arguments along with your Python/PyTorch versions.
+    send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
+
+    # 2. Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    log_level = training_args.get_process_log_level()
+    logger.setLevel(log_level)
+    datasets.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.enable_default_handler()
+    transformers.utils.logging.enable_explicit_format()
+
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # 3. Detecting last checkpoint and eventually continue from last checkpoint
+    last_checkpoint = None
+    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+            raise ValueError(
+                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+                "Use --overwrite_output_dir to overcome."
+            )
+        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
+            logger.info(
+                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+            )
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    # 4. Load dataset
+    raw_datasets = IterableDatasetDict() if data_args.streaming else DatasetDict()
+
+    if training_args.do_train:
+        raw_datasets["train"] = load_maybe_streaming_dataset(
+            data_args.dataset_name,
+            data_args.dataset_config_name,
+            split=data_args.train_split_name,
+            #use_auth_token=True if model_args.use_auth_token else None,
+            streaming=data_args.streaming,
+        )
+
+    if training_args.do_eval:
+        raw_datasets["eval"] = load_maybe_streaming_dataset(
+            data_args.dataset_name,
+            data_args.dataset_config_name,
+            split=data_args.eval_split_name,
+            #use_auth_token=True if model_args.use_auth_token else None,
+            streaming=data_args.streaming,
+        )
+
+    raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
+
+    if data_args.audio_column_name not in raw_datasets_features:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(raw_datasets_features)}."
+        )
+
+    if data_args.text_column_name not in raw_datasets_features:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(raw_datasets_features)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens})
+
+    if training_args.gradient_checkpointing:
+        config.update({"use_cache": False})
+
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=model_args.use_fast_tokenizer,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    if model.config.decoder_start_token_id is None:
+        raise ValueError("Make sure that `config.decoder_start_token_id` is correctly defined")
+
+    if model_args.freeze_feature_encoder:
+        model.freeze_feature_encoder()
+
+    if model_args.freeze_encoder:
+        model.freeze_encoder()
+
+    if data_args.language is not None:
+        # We only need to set the task id when the language is specified (i.e. in a multilingual setting)
+        tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
+
+    # 6. Resample speech dataset if necessary
+    dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
+    if dataset_sampling_rate != feature_extractor.sampling_rate:
+        raw_datasets = raw_datasets.cast_column(
+            data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+        )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
+    min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
+    audio_column_name = data_args.audio_column_name
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    do_remove_punctuation = data_args.do_remove_punctuation
+    normalizer = BasicTextNormalizer()  # 'official' text normalizer from OpenAI
+
+    if data_args.max_train_samples is not None:
+        raw_datasets["train"] = (
+            raw_datasets["train"].take(data_args.max_train_samples)
+            if data_args.streaming
+            else raw_datasets["train"].select(range(data_args.max_train_samples))
+        )
+
+    if data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = (
+            raw_datasets["eval"].take(data_args.max_eval_samples)
+            if data_args.streaming
+            else raw_datasets["eval"].select(range(data_args.max_eval_samples))
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        sample = batch[audio_column_name]
+        
+        # Handle different audio formats - some datasets provide raw arrays, others provide paths
+        if isinstance(sample, dict):
+            if "array" in sample:
+                audio_array = sample["array"]
+                sampling_rate = sample["sampling_rate"]
+            elif "path" in sample:
+                # Load from path if array is not available
+                audio_array = sample["path"]  # datasets will load the file for us
+                sampling_rate = sample.get("sampling_rate", feature_extractor.sampling_rate)
+            else:
+                raise ValueError(f"Unsupported audio format. Sample must contain either 'array' or 'path'. Got {sample.keys()}")
+        else:
+            # Assume it's a direct path or array
+            audio_array = sample
+            sampling_rate = feature_extractor.sampling_rate
+
+        inputs = feature_extractor(audio_array, sampling_rate=sampling_rate)
+        
+        # process audio length
+        if isinstance(audio_array, numpy.ndarray):
+            batch["input_length"] = len(audio_array)
+        else:
+            # If we couldn't get the direct array length, estimate it from the processed features
+            batch["input_length"] = inputs.get(model_input_name)[0].shape[0] * feature_extractor.hop_length
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+        if do_remove_punctuation:
+            input_str = normalizer(input_str).strip()
+        batch["labels"] = tokenizer(input_str).input_ids
+        return batch
+
+    with training_args.main_process_first(desc="dataset map pre-processing"):
+        vectorized_datasets = raw_datasets.map(
+            prepare_dataset,
+            remove_columns=raw_datasets_features,
+        ).with_format("torch")
+
+        if training_args.do_train and data_args.streaming:
+            # manually shuffle if streaming (done by the trainer for non-streaming)
+            vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
+                buffer_size=data_args.shuffle_buffer_size,
+                seed=training_args.seed,
+            )
+
+    # filter training data that is shorter than min_input_length or longer than
+    # max_input_length
+    def is_audio_in_length_range(length):
+        return min_input_length < length < max_input_length
+
+    if training_args.do_train:
+        vectorized_datasets["train"] = vectorized_datasets["train"].filter(
+            is_audio_in_length_range,
+            input_columns=["input_length"],
+        )
+
+    # 8. Load Metric
+    metric = evaluate.load("wer")
+    do_normalize_eval = data_args.do_normalize_eval
+
+    def compute_metrics(pred):
+        pred_ids = pred.predictions
+
+        pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
+
+        pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(pred.label_ids, skip_special_tokens=True)
+
+        if do_normalize_eval:
+            pred_str = [normalizer(pred) for pred in pred_str]
+            label_str = [normalizer(label) for label in label_str]
+            # filtering step to only evaluate the samples that correspond to non-zero references:
+            pred_str = [pred_str[i] for i in range(len(pred_str)) if len(label_str[i]) > 0]
+            label_str = [label_str[i] for i in range(len(label_str)) if len(label_str[i]) > 0]
+
+        wer = 100 * metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer}
+
+    # 9. Create a single speech processor
+    if is_main_process(training_args.local_rank):
+        # save feature extractor, tokenizer and config
+        feature_extractor.save_pretrained(training_args.output_dir)
+        tokenizer.save_pretrained(training_args.output_dir)
+        config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    # 10. Define data collator
+    data_collator = DataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        decoder_start_token_id=model.config.decoder_start_token_id,
+    )
+
+    # 11. Configure Trainer
+    # Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
+    # Only required for streaming: Trainer automatically shuffles non-streaming datasets
+    class ShuffleCallback(TrainerCallback):
+        def on_train_begin(self, args, state, control, **kwargs):
+            self.trainer = kwargs.get('trainer')
+
+        def on_epoch_begin(self, args, state, control, **kwargs):
+            if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
+                return
+            train_dataloader = self.trainer.train_dataloader
+            if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                pass  # set_epoch() is handled by the Trainer
+            elif isinstance(train_dataloader.dataset, IterableDataset):
+                train_dataloader.dataset.set_epoch(train_dataloader.dataset._epoch + 1)
+
+    # Initialize Trainer
+    trainer = Seq2SeqTrainer(
+        model=model,
+        args=training_args,
+        train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
+        eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
+        processing_class=feature_extractor,
+        data_collator=data_collator,
+        compute_metrics=compute_metrics if training_args.predict_with_generate else None,
+        callbacks=[ShuffleCallback()] if data_args.streaming else None,
+    )
+
+    # 12. Training
+    if training_args.do_train:
+        checkpoint = None
+        if training_args.resume_from_checkpoint is not None:
+            checkpoint = training_args.resume_from_checkpoint
+        elif last_checkpoint is not None:
+            checkpoint = last_checkpoint
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+        trainer.save_model()  # Saves the feature extractor too for easy upload
+
+        metrics = train_result.metrics
+        if data_args.max_train_samples:
+            metrics["train_samples"] = data_args.max_train_samples
+        trainer.log_metrics("train", metrics)
+        trainer.save_metrics("train", metrics)
+        trainer.save_state()
+
+    # 13. Evaluation
+    results = {}
+    if training_args.do_eval:
+        logger.info("*** Evaluate ***")
+        metrics = trainer.evaluate(
+            metric_key_prefix="eval",
+            max_length=training_args.generation_max_length,
+            num_beams=training_args.generation_num_beams,
+        )
+        if data_args.max_eval_samples:
+            metrics["eval_samples"] = data_args.max_eval_samples
+
+        trainer.log_metrics("eval", metrics)
+        trainer.save_metrics("eval", metrics)
+
+    # 14. Write Training Stats
+    kwargs = {
+        "finetuned_from": model_args.model_name_or_path,
+        "tasks": "automatic-speech-recognition",
+        "tags": "whisper-event",
+    }
+    if data_args.dataset_name is not None:
+        kwargs["dataset_tags"] = data_args.dataset_name
+        if data_args.dataset_config_name is not None:
+            kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
+        else:
+            kwargs["dataset"] = data_args.dataset_name
+        if "common_voice" in data_args.dataset_name:
+            kwargs["language"] = data_args.dataset_config_name.split('-')[0]
+        if model_args.model_index_name is not None:
+            kwargs["model_name"] = model_args.model_index_name
+
+    if training_args.push_to_hub:
+        trainer.push_to_hub(**kwargs)
+    else:
+        trainer.create_model_card(**kwargs)
+
+    return results
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tokenizer_config.json b/tokenizer_config.json
index d13b786c04765fb1a06492b53587752cd67665ea..3ae24e088bb83a2aaa76a3bf6e3f1cef13e78b53 100644
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -12980,6 +12980,7 @@
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "errors": "replace",
+  "extra_special_tokens": {},
   "model_max_length": 1024,
   "pad_token": "<|endoftext|>",
   "processor_class": "WhisperProcessor",
diff --git a/training_args.bin b/training_args.bin
index 65e80f3eadc5c414af974d4849def31f995fd5a1..218dfd2e685efef6a3bca12817ffe102823a68fa 100644
--- a/training_args.bin
+++ b/training_args.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e5015f6997af49dd4702cbba394870a18c74f3b62b5a4ffcc8bf3aa71cc41ee
-size 5368
+oid sha256:37d41e6c93c9164dab27b25a4957996293e07bbed9895811c22360ffbda7ebbf
+size 5432
diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3e76e3e81a1059aef65345804a4f269bcd9183ad
--- /dev/null
+++ b/wandb/debug-internal.log
@@ -0,0 +1,7 @@
+{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
+{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
diff --git a/wandb/debug.log b/wandb/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..d6ccd57adcf2bbe370a51abbc03e1e4a2718eb85
--- /dev/null
+++ b/wandb/debug.log
@@ -0,0 +1,25 @@
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():756] calling init triggers
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():789] starting backend
+2025-02-12 15:27:10,107 INFO    MainThread:243546 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 15:27:10,112 INFO    MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 15:27:10,113 INFO    MainThread:243546 [wandb_init.py:init():808] backend started and connected
+2025-02-12 15:27:10,115 INFO    MainThread:243546 [wandb_init.py:init():901] updated telemetry
+2025-02-12 15:27:10,122 INFO    MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 15:27:10,584 INFO    MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 15:27:10,691 INFO    MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 15:27:10,694 INFO    MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 15:27:10,698 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
+2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/config.yaml b/wandb/run-20250212_121751-d4i88lzt/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..043453cafc3d9969981ef61b7a8be2e92734597b
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_12-17-27_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/output.log b/wandb/run-20250212_121751-d4i88lzt/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..47ae9b884ed0bd7b0b1e663b294089b5065b6378
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt b/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json b/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c4f87be68cf8cc40c16f58b87fc77156484b279
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T12:17:51.527114Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "297346564096"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json b/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..d6af67f7efa449508164027a6273196ce78339b0
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392}
+{"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}}
+{"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"}
+{"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..26894375e49df56758efcfe21e6d3c1198d1f1c3
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"}
+{"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"}
+{"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"}
diff --git a/wandb/run-20250212_121751-d4i88lzt/logs/debug.log b/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..9d9b0f45e81af07737809add0aa564e5a82d4e9b
--- /dev/null
+++ b/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
+2025-02-12 12:17:51,311 INFO    MainThread:223392 [wandb_init.py:init():756] calling init triggers
+2025-02-12 12:17:51,312 INFO    MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 12:17:51,312 INFO    MainThread:223392 [wandb_init.py:init():789] starting backend
+2025-02-12 12:17:51,521 INFO    MainThread:223392 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 12:17:51,526 INFO    MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 12:17:51,526 INFO    MainThread:223392 [wandb_init.py:init():808] backend started and connected
+2025-02-12 12:17:51,528 INFO    MainThread:223392 [wandb_init.py:init():901] updated telemetry
+2025-02-12 12:17:51,535 INFO    MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 12:17:51,944 INFO    MainThread:223392 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 12:17:52,049 INFO    MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 12:17:52,051 INFO    MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 12:17:52,052 INFO    MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 12:17:52,054 INFO    MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75ef87e92c00>>
+2025-02-12 12:17:52,055 INFO    MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb b/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..22a915c2f93dba32f6fb4cb19cdad8d41103ad8f
Binary files /dev/null and b/wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb differ
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml b/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0190eccc1f197e04acf36d5f4461aa7e993e6582
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_12-26-11_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/output.log b/wandb/run-20250212_122637-v3d3ouvn/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..47ae9b884ed0bd7b0b1e663b294089b5065b6378
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt b/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..09e834199f3ff4987252a9c26cf0f4e0a17ac89c
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T12:26:37.277902Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "297346666496"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..a2c451b0fdc549443d1d6b56f3138b2f0778d2da
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110}
+{"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}}
+{"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"}
+{"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..836534c70d6b5a016b47367347a114c8262f9db4
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"}
+{"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"}
+{"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"}
diff --git a/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log b/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..651a7c89fd0b1c415f299466295634e0fb5f97ae
--- /dev/null
+++ b/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:init():756] calling init triggers
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 12:26:37,062 INFO    MainThread:224110 [wandb_init.py:init():789] starting backend
+2025-02-12 12:26:37,272 INFO    MainThread:224110 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 12:26:37,277 INFO    MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 12:26:37,277 INFO    MainThread:224110 [wandb_init.py:init():808] backend started and connected
+2025-02-12 12:26:37,279 INFO    MainThread:224110 [wandb_init.py:init():901] updated telemetry
+2025-02-12 12:26:37,285 INFO    MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 12:26:37,653 INFO    MainThread:224110 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 12:26:37,764 INFO    MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 12:26:37,765 INFO    MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 12:26:37,765 INFO    MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 12:26:37,765 INFO    MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 12:26:37,766 INFO    MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 12:26:37,767 INFO    MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 12:26:37,770 INFO    MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7a5cbc15a330>>
+2025-02-12 12:26:37,770 INFO    MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb b/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..ec09d574bec753b9c13409f7b715d91402a72733
Binary files /dev/null and b/wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb differ
diff --git a/wandb/run-20250212_122854-4m048f5s/files/config.yaml b/wandb/run-20250212_122854-4m048f5s/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd2427648bdf8f30b7a5b1a74e995772423700bd
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_12-28-29_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_122854-4m048f5s/files/output.log b/wandb/run-20250212_122854-4m048f5s/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..47ae9b884ed0bd7b0b1e663b294089b5065b6378
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_122854-4m048f5s/files/requirements.txt b/wandb/run-20250212_122854-4m048f5s/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json b/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f63869dc081394dcbd07bd84335642df549da86e
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T12:28:54.528397Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "297346756608"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json b/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log b/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..0a340626ec035668304ac5b99a523d0e9b994b99
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528}
+{"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}}
+{"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"}
+{"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log b/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..12bd0e04a87c9e69aaa4351910eaad3205df4abf
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"}
+{"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"}
+{"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"}
+{"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"}
+{"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"}
+{"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"}
+{"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"}
+{"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"}
+{"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"}
+{"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"}
+{"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"}
diff --git a/wandb/run-20250212_122854-4m048f5s/logs/debug.log b/wandb/run-20250212_122854-4m048f5s/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..54eaf255ca03d83dfe297f954d06f7be79378056
--- /dev/null
+++ b/wandb/run-20250212_122854-4m048f5s/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:init():756] calling init triggers
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 12:28:54,316 INFO    MainThread:224528 [wandb_init.py:init():789] starting backend
+2025-02-12 12:28:54,521 INFO    MainThread:224528 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 12:28:54,527 INFO    MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 12:28:54,528 INFO    MainThread:224528 [wandb_init.py:init():808] backend started and connected
+2025-02-12 12:28:54,530 INFO    MainThread:224528 [wandb_init.py:init():901] updated telemetry
+2025-02-12 12:28:54,537 INFO    MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 12:28:54,883 INFO    MainThread:224528 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 12:28:54,988 INFO    MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 12:28:54,989 INFO    MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 12:28:54,989 INFO    MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 12:28:54,989 INFO    MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 12:28:54,990 INFO    MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 12:28:54,991 INFO    MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 12:28:54,995 INFO    MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x71c5f6c57cb0>>
+2025-02-12 12:28:54,995 INFO    MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb b/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..d6e2b9a990f433fbef578c9ebdfb5b9a71ba6c24
Binary files /dev/null and b/wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb differ
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml b/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fda1ff6a94e5d574407cad68313478699d3b809c
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_12-51-48_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/output.log b/wandb/run-20250212_125202-c6xjc1gs/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..47ae9b884ed0bd7b0b1e663b294089b5065b6378
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt b/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..de67d54e80183d40bb763b73a71aa662e9c26e2d
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T12:52:03.105234Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=test_parl",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777016832"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..b2a91d838478809786e569e4b108333296f1c838
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112}
+{"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}}
+{"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"}
+{"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..e92e3fbe32b1a1557ac2822b511c40cd3bec6edc
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"}
+{"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"}
+{"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"}
diff --git a/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log b/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..f7f6579dfe6ece92d42e8341a45478b9a1a5d112
--- /dev/null
+++ b/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 12:52:02,886 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:init():756] calling init triggers
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 12:52:02,887 INFO    MainThread:226112 [wandb_init.py:init():789] starting backend
+2025-02-12 12:52:03,097 INFO    MainThread:226112 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 12:52:03,104 INFO    MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 12:52:03,104 INFO    MainThread:226112 [wandb_init.py:init():808] backend started and connected
+2025-02-12 12:52:03,107 INFO    MainThread:226112 [wandb_init.py:init():901] updated telemetry
+2025-02-12 12:52:03,114 INFO    MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 12:52:03,483 INFO    MainThread:226112 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 12:52:03,566 INFO    MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 12:52:03,568 INFO    MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 12:52:03,569 INFO    MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 12:52:03,571 INFO    MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7d4830f2ddf0>>
+2025-02-12 12:52:03,571 INFO    MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb b/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..2f75b3e73a217b18b87ae8966776fa45be09c53d
Binary files /dev/null and b/wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb differ
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml b/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..07f41facb74690088d1d0b07a0c5bd2e9cb291f4
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_12-58-59_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/output.log b/wandb/run-20250212_125924-xhsgsxqq/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..0e757b553e4758f4d7cd9dbeb4450a0b166b1880
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 632, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt b/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..6df8aff999de9e30b89c5c5cc1a7450cf021e67a
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T12:59:24.816046Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777115136"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..b7598f14ab3c8cbb4c8d36057ac70a206f4ddfed
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T12:59:24.63359638Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa90v2n0h/port-226591.txt","pid":226591,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T12:59:24.673351851Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226591}
+{"time":"2025-02-12T12:59:24.673324591Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41203,"Zone":""}}
+{"time":"2025-02-12T12:59:24.809390061Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:24.819517706Z","level":"INFO","msg":"handleInformInit: received","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:24.923364896Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xhsgsxqq","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:25.341856618Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:25.341962867Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T12:59:25.341964847Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:25.342139496Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:25.569637185Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41203->127.0.0.1:53388: use of closed network connection","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:26.643739482Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:26.643783881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:53388"}
+{"time":"2025-02-12T12:59:26.643808411Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..26b5602b5006fa339037330ef9890aa5ee369829
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T12:59:24.819689255Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-core.log"}
+{"time":"2025-02-12T12:59:24.923303396Z","level":"INFO","msg":"created new stream","id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:24.923354596Z","level":"INFO","msg":"stream: started","id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:24.923472545Z","level":"INFO","msg":"writer: Do: started","stream_id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:24.923494475Z","level":"INFO","msg":"handler: started","stream_id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:24.923560215Z","level":"INFO","msg":"sender: started","stream_id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:25.192419842Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T12:59:25.341944447Z","level":"INFO","msg":"stream: closing","id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:25.341971537Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T12:59:25.342739283Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T12:59:26.408412135Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T12:59:26.643441283Z","level":"INFO","msg":"handler: closed","stream_id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:26.643483513Z","level":"INFO","msg":"writer: Close: closed","stream_id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:26.643525403Z","level":"INFO","msg":"sender: closed","stream_id":"xhsgsxqq"}
+{"time":"2025-02-12T12:59:26.643566923Z","level":"INFO","msg":"stream: closed","id":"xhsgsxqq"}
diff --git a/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log b/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..bc26d42fc8a709f9cfcfa6a7dbdbbc82be8f50e9
--- /dev/null
+++ b/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Configure stats pid to 226591
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug.log
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125924-xhsgsxqq/logs/debug-internal.log
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:init():756] calling init triggers
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 12:59:24,598 INFO    MainThread:226591 [wandb_init.py:init():789] starting backend
+2025-02-12 12:59:24,809 INFO    MainThread:226591 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 12:59:24,815 INFO    MainThread:226591 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 12:59:24,815 INFO    MainThread:226591 [wandb_init.py:init():808] backend started and connected
+2025-02-12 12:59:24,818 INFO    MainThread:226591 [wandb_init.py:init():901] updated telemetry
+2025-02-12 12:59:24,823 INFO    MainThread:226591 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 12:59:25,189 INFO    MainThread:226591 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 12:59:25,301 INFO    MainThread:226591 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 12:59:25,303 INFO    MainThread:226591 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 12:59:25,304 INFO    MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-58-59_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 12:59:25,306 INFO    MainThread:226591 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x76451d282f30>>
+2025-02-12 12:59:25,306 INFO    MainThread:226591 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 12:59:25,342 WARNING MsgRouterThr:226591 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb b/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..1b53de295e68266af60d83bca74b9be0894b34e8
Binary files /dev/null and b/wandb/run-20250212_125924-xhsgsxqq/run-xhsgsxqq.wandb differ
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/config.yaml b/wandb/run-20250212_130533-zeu6vay4/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..420595717bd915426e2e2cab93fdb5e08bd589cb
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_13-05-10_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/output.log b/wandb/run-20250212_130533-zeu6vay4/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..769cfff2bf9a545350da2d3c1f0974308181aefb
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 633, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 582, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt b/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json b/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..aebfc577d2e88050d8085f734bc8f66f785f3040
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T13:05:34.019960Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777221632"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json b/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..87fbf64e8666318f6e5b43dae2d01a140fc24ead
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T13:05:33.837553705Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvxrga7tq/port-227738.txt","pid":227738,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T13:05:33.841824082Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":227738}
+{"time":"2025-02-12T13:05:33.841805772Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41271,"Zone":""}}
+{"time":"2025-02-12T13:05:34.013327792Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:34.022772969Z","level":"INFO","msg":"handleInformInit: received","streamId":"zeu6vay4","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:34.128884037Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"zeu6vay4","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:34.567948471Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:34.568036341Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:34.568049191Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T13:05:34.568283769Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:34.807984453Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41271->127.0.0.1:36972: use of closed network connection","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:35.861897508Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:35.861922138Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:36972"}
+{"time":"2025-02-12T13:05:35.861946917Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..d3fe88aa2e519427abbec9fea34b1053792e9916
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T13:05:34.023147337Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug-core.log"}
+{"time":"2025-02-12T13:05:34.128801887Z","level":"INFO","msg":"created new stream","id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:34.128873947Z","level":"INFO","msg":"stream: started","id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:34.128926027Z","level":"INFO","msg":"writer: Do: started","stream_id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:34.128988376Z","level":"INFO","msg":"sender: started","stream_id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:34.129040036Z","level":"INFO","msg":"handler: started","stream_id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:34.419229803Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T13:05:34.568056211Z","level":"INFO","msg":"stream: closing","id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:34.56812972Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T13:05:34.568907056Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T13:05:35.64406902Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T13:05:35.861616759Z","level":"INFO","msg":"handler: closed","stream_id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:35.861676279Z","level":"INFO","msg":"writer: Close: closed","stream_id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:35.861688929Z","level":"INFO","msg":"sender: closed","stream_id":"zeu6vay4"}
+{"time":"2025-02-12T13:05:35.861776488Z","level":"INFO","msg":"stream: closed","id":"zeu6vay4"}
diff --git a/wandb/run-20250212_130533-zeu6vay4/logs/debug.log b/wandb/run-20250212_130533-zeu6vay4/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..9b2ee9877baf73f9ef90a46ae8c6af7f3f198927
--- /dev/null
+++ b/wandb/run-20250212_130533-zeu6vay4/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Configure stats pid to 227738
+2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 13:05:33,802 INFO    MainThread:227738 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug.log
+2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_130533-zeu6vay4/logs/debug-internal.log
+2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:init():756] calling init triggers
+2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 13:05:33,803 INFO    MainThread:227738 [wandb_init.py:init():789] starting backend
+2025-02-12 13:05:34,013 INFO    MainThread:227738 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 13:05:34,019 INFO    MainThread:227738 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 13:05:34,019 INFO    MainThread:227738 [wandb_init.py:init():808] backend started and connected
+2025-02-12 13:05:34,021 INFO    MainThread:227738 [wandb_init.py:init():901] updated telemetry
+2025-02-12 13:05:34,029 INFO    MainThread:227738 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 13:05:34,416 INFO    MainThread:227738 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 13:05:34,527 INFO    MainThread:227738 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 13:05:34,529 INFO    MainThread:227738 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 13:05:34,530 INFO    MainThread:227738 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-05-10_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 13:05:34,533 INFO    MainThread:227738 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x79ded964ef90>>
+2025-02-12 13:05:34,533 INFO    MainThread:227738 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 13:05:34,568 WARNING MsgRouterThr:227738 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb b/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..7901333681792ad6d6f54878666ba62ed5e18337
Binary files /dev/null and b/wandb/run-20250212_130533-zeu6vay4/run-zeu6vay4.wandb differ
diff --git a/wandb/run-20250212_131820-cnos968u/files/config.yaml b/wandb/run-20250212_131820-cnos968u/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95906302a5990edf8b7625c376bbe1acd0b580a0
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_13-17-51_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_131820-cnos968u/files/output.log b/wandb/run-20250212_131820-cnos968u/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..47ae9b884ed0bd7b0b1e663b294089b5065b6378
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_131820-cnos968u/files/requirements.txt b/wandb/run-20250212_131820-cnos968u/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json b/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..90498074d4382bf401e4eae2589b05d71bb571fe
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T13:18:20.315500Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777364992"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json b/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug-core.log b/wandb/run-20250212_131820-cnos968u/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..c6146a7673a983023bddad8f6b42c8bee05d89af
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T13:18:20.13270486Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoihqtf7_/port-228562.txt","pid":228562,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T13:18:20.138326066Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":228562}
+{"time":"2025-02-12T13:18:20.138290606Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43223,"Zone":""}}
+{"time":"2025-02-12T13:18:20.311508128Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:20.318120198Z","level":"INFO","msg":"handleInformInit: received","streamId":"cnos968u","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:20.423169259Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"cnos968u","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:20.829667746Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:20.829731656Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:20.829793855Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T13:18:20.829860655Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:20.996850774Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:43223->127.0.0.1:59824: use of closed network connection","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:22.315588426Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:22.315614045Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:59824"}
+{"time":"2025-02-12T13:18:22.315638895Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log b/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..8906a5937e9d4dc75937d8ef3efaf8e6c19bc30f
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T13:18:20.318385167Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug-core.log"}
+{"time":"2025-02-12T13:18:20.423108789Z","level":"INFO","msg":"created new stream","id":"cnos968u"}
+{"time":"2025-02-12T13:18:20.423149939Z","level":"INFO","msg":"stream: started","id":"cnos968u"}
+{"time":"2025-02-12T13:18:20.423187669Z","level":"INFO","msg":"writer: Do: started","stream_id":"cnos968u"}
+{"time":"2025-02-12T13:18:20.423274918Z","level":"INFO","msg":"sender: started","stream_id":"cnos968u"}
+{"time":"2025-02-12T13:18:20.423278528Z","level":"INFO","msg":"handler: started","stream_id":"cnos968u"}
+{"time":"2025-02-12T13:18:20.691599029Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T13:18:20.829801165Z","level":"INFO","msg":"stream: closing","id":"cnos968u"}
+{"time":"2025-02-12T13:18:20.829838505Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T13:18:20.830448261Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T13:18:22.102558325Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T13:18:22.315316527Z","level":"INFO","msg":"handler: closed","stream_id":"cnos968u"}
+{"time":"2025-02-12T13:18:22.315380557Z","level":"INFO","msg":"writer: Close: closed","stream_id":"cnos968u"}
+{"time":"2025-02-12T13:18:22.315415207Z","level":"INFO","msg":"sender: closed","stream_id":"cnos968u"}
+{"time":"2025-02-12T13:18:22.315473316Z","level":"INFO","msg":"stream: closed","id":"cnos968u"}
diff --git a/wandb/run-20250212_131820-cnos968u/logs/debug.log b/wandb/run-20250212_131820-cnos968u/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..39be2fccb1e90ae66cd4e4477210e91974f70b36
--- /dev/null
+++ b/wandb/run-20250212_131820-cnos968u/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Configure stats pid to 228562
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug.log
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_131820-cnos968u/logs/debug-internal.log
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:init():756] calling init triggers
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 13:18:20,106 INFO    MainThread:228562 [wandb_init.py:init():789] starting backend
+2025-02-12 13:18:20,311 INFO    MainThread:228562 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 13:18:20,315 INFO    MainThread:228562 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 13:18:20,315 INFO    MainThread:228562 [wandb_init.py:init():808] backend started and connected
+2025-02-12 13:18:20,316 INFO    MainThread:228562 [wandb_init.py:init():901] updated telemetry
+2025-02-12 13:18:20,320 INFO    MainThread:228562 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 13:18:20,688 INFO    MainThread:228562 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 13:18:20,789 INFO    MainThread:228562 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 13:18:20,791 INFO    MainThread:228562 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 13:18:20,792 INFO    MainThread:228562 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-17-51_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 13:18:20,794 INFO    MainThread:228562 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7c8ab3472b40>>
+2025-02-12 13:18:20,794 INFO    MainThread:228562 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 13:18:20,830 WARNING MsgRouterThr:228562 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb b/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..8fd59480179eef903cc9efb265ca12fb88f77992
Binary files /dev/null and b/wandb/run-20250212_131820-cnos968u/run-cnos968u.wandb differ
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml b/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf75123251a142c3bb6a48006c0bdfb4679249b3
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_13-49-16_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/output.log b/wandb/run-20250212_134942-5ywh9vkd/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..b9e4b6c313316dc48b344f88dc5473c4de1f1088
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 631, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 580, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt b/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3da9f22f6e027ebd1af1c6145cfa29e6643c9e3d
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T13:49:42.549340Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777541120"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..14ea2e58769ad59628cf9dcf7f7d3c3cd69e16ea
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T13:49:42.368539349Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcs75h_7n/port-230104.txt","pid":230104,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T13:49:42.376031144Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230104}
+{"time":"2025-02-12T13:49:42.375994744Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46317,"Zone":""}}
+{"time":"2025-02-12T13:49:42.545856407Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:42.550347793Z","level":"INFO","msg":"handleInformInit: received","streamId":"5ywh9vkd","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:42.653585761Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"5ywh9vkd","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:43.065873804Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:43.065944244Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:43.065999603Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T13:49:43.066118552Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:43.307941987Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:46317->127.0.0.1:58840: use of closed network connection","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:44.336718599Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:44.336762259Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:58840"}
+{"time":"2025-02-12T13:49:44.336780169Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..958ba48035f615c72447b59df82dad6ed3e33a35
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T13:49:42.550471882Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug-core.log"}
+{"time":"2025-02-12T13:49:42.653534801Z","level":"INFO","msg":"created new stream","id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:42.653576741Z","level":"INFO","msg":"stream: started","id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:42.653711879Z","level":"INFO","msg":"handler: started","stream_id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:42.653689329Z","level":"INFO","msg":"writer: Do: started","stream_id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:42.653750879Z","level":"INFO","msg":"sender: started","stream_id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:42.915738751Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T13:49:43.065978643Z","level":"INFO","msg":"stream: closing","id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:43.066102462Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T13:49:43.066991893Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T13:49:44.121293278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T13:49:44.336369943Z","level":"INFO","msg":"handler: closed","stream_id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:44.336433982Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:44.336490092Z","level":"INFO","msg":"sender: closed","stream_id":"5ywh9vkd"}
+{"time":"2025-02-12T13:49:44.336537621Z","level":"INFO","msg":"stream: closed","id":"5ywh9vkd"}
diff --git a/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log b/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..daf9c305c6a9112d08089604436fe99d07693e41
--- /dev/null
+++ b/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Configure stats pid to 230104
+2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 13:49:42,336 INFO    MainThread:230104 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug.log
+2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_134942-5ywh9vkd/logs/debug-internal.log
+2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:init():756] calling init triggers
+2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 13:49:42,337 INFO    MainThread:230104 [wandb_init.py:init():789] starting backend
+2025-02-12 13:49:42,545 INFO    MainThread:230104 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 13:49:42,548 INFO    MainThread:230104 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 13:49:42,549 INFO    MainThread:230104 [wandb_init.py:init():808] backend started and connected
+2025-02-12 13:49:42,550 INFO    MainThread:230104 [wandb_init.py:init():901] updated telemetry
+2025-02-12 13:49:42,553 INFO    MainThread:230104 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 13:49:42,912 INFO    MainThread:230104 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 13:49:43,026 INFO    MainThread:230104 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 13:49:43,027 INFO    MainThread:230104 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 13:49:43,028 INFO    MainThread:230104 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-49-16_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 13:49:43,031 INFO    MainThread:230104 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x701149d824e0>>
+2025-02-12 13:49:43,031 INFO    MainThread:230104 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 13:49:43,066 WARNING MsgRouterThr:230104 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb b/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..d95543eeebd7c7ded69dd770ef822456843f59fd
Binary files /dev/null and b/wandb/run-20250212_134942-5ywh9vkd/run-5ywh9vkd.wandb differ
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml b/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f1ca62c76dba92301a01886cb09958ac5a1f9f8
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_13-51-27_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/output.log b/wandb/run-20250212_135151-5m7b3lhr/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..da5340cfe9cc02a517c9de0cdb66c052b387beab
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/files/output.log
@@ -0,0 +1,23 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+Inside on_epoch_begin - train_dataloader: None
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 631, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 580, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 556, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt b/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3e914497e6936a06c61ef66438821f8fa5b99f36
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T13:51:51.496687Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777639424"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..16c1fd0e3b169fe267030e00e4d3679cb9c42a52
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T13:51:51.3124141Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcyi8bfs3/port-230520.txt","pid":230520,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T13:51:51.317857259Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230520}
+{"time":"2025-02-12T13:51:51.317819419Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41833,"Zone":""}}
+{"time":"2025-02-12T13:51:51.490296524Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:51.49928487Z","level":"INFO","msg":"handleInformInit: received","streamId":"5m7b3lhr","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:51.60503634Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"5m7b3lhr","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:52.040899031Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:52.0409399Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:52.04094382Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T13:51:52.04102198Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:52.213258576Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41833->127.0.0.1:51804: use of closed network connection","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:53.364540267Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:53.364555197Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51804"}
+{"time":"2025-02-12T13:51:53.364566507Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..b9aa4b44922e1558b02d06d29a822d654f48137f
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T13:51:51.499619967Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug-core.log"}
+{"time":"2025-02-12T13:51:51.604967821Z","level":"INFO","msg":"created new stream","id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:51.60502639Z","level":"INFO","msg":"stream: started","id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:51.605130129Z","level":"INFO","msg":"writer: Do: started","stream_id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:51.605216728Z","level":"INFO","msg":"handler: started","stream_id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:51.605315277Z","level":"INFO","msg":"sender: started","stream_id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:51.888376389Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T13:51:52.0410007Z","level":"INFO","msg":"stream: closing","id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:52.041038759Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T13:51:52.041736053Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T13:51:53.021189887Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T13:51:53.364332489Z","level":"INFO","msg":"handler: closed","stream_id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:53.364389748Z","level":"INFO","msg":"sender: closed","stream_id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:53.364389018Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5m7b3lhr"}
+{"time":"2025-02-12T13:51:53.364475307Z","level":"INFO","msg":"stream: closed","id":"5m7b3lhr"}
diff --git a/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log b/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..a7e7ed2dbfef267db590321ad3622db7e9cf3621
--- /dev/null
+++ b/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Configure stats pid to 230520
+2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 13:51:51,279 INFO    MainThread:230520 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug.log
+2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135151-5m7b3lhr/logs/debug-internal.log
+2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:init():756] calling init triggers
+2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 13:51:51,280 INFO    MainThread:230520 [wandb_init.py:init():789] starting backend
+2025-02-12 13:51:51,490 INFO    MainThread:230520 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 13:51:51,496 INFO    MainThread:230520 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 13:51:51,496 INFO    MainThread:230520 [wandb_init.py:init():808] backend started and connected
+2025-02-12 13:51:51,498 INFO    MainThread:230520 [wandb_init.py:init():901] updated telemetry
+2025-02-12 13:51:51,505 INFO    MainThread:230520 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 13:51:51,885 INFO    MainThread:230520 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 13:51:52,001 INFO    MainThread:230520 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 13:51:52,003 INFO    MainThread:230520 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 13:51:52,004 INFO    MainThread:230520 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-51-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 13:51:52,006 INFO    MainThread:230520 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x74295c46d5e0>>
+2025-02-12 13:51:52,006 INFO    MainThread:230520 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 13:51:52,041 WARNING MsgRouterThr:230520 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb b/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..90536287bc7c440e01774fadf1beb280c4a85b3e
Binary files /dev/null and b/wandb/run-20250212_135151-5m7b3lhr/run-5m7b3lhr.wandb differ
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/config.yaml b/wandb/run-20250212_135331-x29lgb1q/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..140e3770741b2dbcca969ef317059023c5bc500e
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_13-53-04_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/output.log b/wandb/run-20250212_135331-x29lgb1q/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..93558c333a13dd8d66af57c288bf367d12bd83cc
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/files/output.log
@@ -0,0 +1,2 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]
+Inside on_epoch_begin - train_dataloader: None
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/requirements.txt b/wandb/run-20250212_135331-x29lgb1q/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/wandb-metadata.json b/wandb/run-20250212_135331-x29lgb1q/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..a90e752ec5892415734ab62b5958cbe20968d2e8
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T13:53:31.254517Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777729536"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_135331-x29lgb1q/files/wandb-summary.json b/wandb/run-20250212_135331-x29lgb1q/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..40c619c4d224bffb127309ef90b7284f8468e0d0
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T13:53:31.070943678Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpnzj0vsxp/port-230894.txt","pid":230894,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T13:53:31.105857563Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":230894}
+{"time":"2025-02-12T13:53:31.105978192Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39599,"Zone":""}}
+{"time":"2025-02-12T13:53:31.2489183Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:31.257512053Z","level":"INFO","msg":"handleInformInit: received","streamId":"x29lgb1q","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:31.363529712Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"x29lgb1q","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:31.886789826Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:31.886893335Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:31.886966894Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T13:53:31.887054423Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:32.110768467Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:39599->127.0.0.1:43984: use of closed network connection","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:33.114823214Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:33.114850744Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:43984"}
+{"time":"2025-02-12T13:53:33.114865414Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..dd1c8e4964b1bf449f85380ffb5d3a89ca81e194
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T13:53:31.257882961Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug-core.log"}
+{"time":"2025-02-12T13:53:31.363436433Z","level":"INFO","msg":"created new stream","id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:31.363519102Z","level":"INFO","msg":"stream: started","id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:31.363646481Z","level":"INFO","msg":"writer: Do: started","stream_id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:31.36374892Z","level":"INFO","msg":"sender: started","stream_id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:31.363851899Z","level":"INFO","msg":"handler: started","stream_id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:31.741270453Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T13:53:31.886903155Z","level":"INFO","msg":"stream: closing","id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:31.886939864Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T13:53:31.887754548Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T13:53:32.909722722Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T13:53:33.114562596Z","level":"INFO","msg":"handler: closed","stream_id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:33.114621616Z","level":"INFO","msg":"writer: Close: closed","stream_id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:33.114647856Z","level":"INFO","msg":"sender: closed","stream_id":"x29lgb1q"}
+{"time":"2025-02-12T13:53:33.114708545Z","level":"INFO","msg":"stream: closed","id":"x29lgb1q"}
diff --git a/wandb/run-20250212_135331-x29lgb1q/logs/debug.log b/wandb/run-20250212_135331-x29lgb1q/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..305941888c14c138a20e3bb8a3a09cb3ea14fc13
--- /dev/null
+++ b/wandb/run-20250212_135331-x29lgb1q/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Configure stats pid to 230894
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug.log
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135331-x29lgb1q/logs/debug-internal.log
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:init():756] calling init triggers
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 13:53:31,043 INFO    MainThread:230894 [wandb_init.py:init():789] starting backend
+2025-02-12 13:53:31,248 INFO    MainThread:230894 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 13:53:31,254 INFO    MainThread:230894 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 13:53:31,254 INFO    MainThread:230894 [wandb_init.py:init():808] backend started and connected
+2025-02-12 13:53:31,255 INFO    MainThread:230894 [wandb_init.py:init():901] updated telemetry
+2025-02-12 13:53:31,260 INFO    MainThread:230894 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 13:53:31,738 INFO    MainThread:230894 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 13:53:31,846 INFO    MainThread:230894 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 13:53:31,847 INFO    MainThread:230894 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 13:53:31,847 INFO    MainThread:230894 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 13:53:31,847 INFO    MainThread:230894 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 13:53:31,848 INFO    MainThread:230894 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 13:53:31,849 INFO    MainThread:230894 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-53-04_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 13:53:31,852 INFO    MainThread:230894 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x72c09ca6eed0>>
+2025-02-12 13:53:31,852 INFO    MainThread:230894 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 13:53:31,887 WARNING MsgRouterThr:230894 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb b/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..4dde8c9fffacb3c18e046c0f82aeb24874408d5a
Binary files /dev/null and b/wandb/run-20250212_135331-x29lgb1q/run-x29lgb1q.wandb differ
diff --git a/wandb/run-20250212_135435-53evlis5/files/config.yaml b/wandb/run-20250212_135435-53evlis5/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30f65cde960f35c69b0bdf95f4b9d9d2b3b1184f
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_13-54-12_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_135435-53evlis5/files/output.log b/wandb/run-20250212_135435-53evlis5/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..93558c333a13dd8d66af57c288bf367d12bd83cc
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/files/output.log
@@ -0,0 +1,2 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]
+Inside on_epoch_begin - train_dataloader: None
diff --git a/wandb/run-20250212_135435-53evlis5/files/requirements.txt b/wandb/run-20250212_135435-53evlis5/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135435-53evlis5/files/wandb-metadata.json b/wandb/run-20250212_135435-53evlis5/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..c9b338ad3442a65b2ed7fea55d58accc80964997
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T13:54:36.201204Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777815552"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_135435-53evlis5/files/wandb-summary.json b/wandb/run-20250212_135435-53evlis5/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug-core.log b/wandb/run-20250212_135435-53evlis5/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..63b9ba146e9102da80c2f2f7f97d0353b6c79877
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T13:54:36.018759837Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpojka6x19/port-231248.txt","pid":231248,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T13:54:36.02418464Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":231248}
+{"time":"2025-02-12T13:54:36.02415448Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46325,"Zone":""}}
+{"time":"2025-02-12T13:54:36.194658966Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:36.203723827Z","level":"INFO","msg":"handleInformInit: received","streamId":"53evlis5","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:36.309920346Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"53evlis5","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:36.758732366Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:36.758792086Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:36.758889215Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T13:54:36.758938304Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:36.979693541Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:46325->127.0.0.1:35906: use of closed network connection","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:38.005429072Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:38.005459881Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35906"}
+{"time":"2025-02-12T13:54:38.005482981Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log b/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..12101c30e8dcabd6c545c213d9abcb5c366354b2
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T13:54:36.204103004Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug-core.log"}
+{"time":"2025-02-12T13:54:36.309862037Z","level":"INFO","msg":"created new stream","id":"53evlis5"}
+{"time":"2025-02-12T13:54:36.309910267Z","level":"INFO","msg":"stream: started","id":"53evlis5"}
+{"time":"2025-02-12T13:54:36.310009206Z","level":"INFO","msg":"writer: Do: started","stream_id":"53evlis5"}
+{"time":"2025-02-12T13:54:36.310099665Z","level":"INFO","msg":"sender: started","stream_id":"53evlis5"}
+{"time":"2025-02-12T13:54:36.310319183Z","level":"INFO","msg":"handler: started","stream_id":"53evlis5"}
+{"time":"2025-02-12T13:54:36.609964328Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T13:54:36.758890355Z","level":"INFO","msg":"stream: closing","id":"53evlis5"}
+{"time":"2025-02-12T13:54:36.758937814Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T13:54:36.759635628Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T13:54:37.773850697Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T13:54:38.005111685Z","level":"INFO","msg":"handler: closed","stream_id":"53evlis5"}
+{"time":"2025-02-12T13:54:38.005175764Z","level":"INFO","msg":"sender: closed","stream_id":"53evlis5"}
+{"time":"2025-02-12T13:54:38.005166594Z","level":"INFO","msg":"writer: Close: closed","stream_id":"53evlis5"}
+{"time":"2025-02-12T13:54:38.005321073Z","level":"INFO","msg":"stream: closed","id":"53evlis5"}
diff --git a/wandb/run-20250212_135435-53evlis5/logs/debug.log b/wandb/run-20250212_135435-53evlis5/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..5f43982224cb3297178d00e7d017b3a59158840e
--- /dev/null
+++ b/wandb/run-20250212_135435-53evlis5/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Configure stats pid to 231248
+2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 13:54:35,983 INFO    MainThread:231248 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug.log
+2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135435-53evlis5/logs/debug-internal.log
+2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:init():756] calling init triggers
+2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 13:54:35,984 INFO    MainThread:231248 [wandb_init.py:init():789] starting backend
+2025-02-12 13:54:36,194 INFO    MainThread:231248 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 13:54:36,200 INFO    MainThread:231248 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 13:54:36,200 INFO    MainThread:231248 [wandb_init.py:init():808] backend started and connected
+2025-02-12 13:54:36,203 INFO    MainThread:231248 [wandb_init.py:init():901] updated telemetry
+2025-02-12 13:54:36,210 INFO    MainThread:231248 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 13:54:36,606 INFO    MainThread:231248 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 13:54:36,718 INFO    MainThread:231248 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 13:54:36,720 INFO    MainThread:231248 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 13:54:36,721 INFO    MainThread:231248 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-54-12_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 13:54:36,723 INFO    MainThread:231248 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7e080a0953a0>>
+2025-02-12 13:54:36,723 INFO    MainThread:231248 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 13:54:36,758 WARNING MsgRouterThr:231248 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb b/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..7ec7ba20b62cc55f53b817904fd528e0aeac4066
Binary files /dev/null and b/wandb/run-20250212_135435-53evlis5/run-53evlis5.wandb differ
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml b/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c51c497794483c0b0380f14431b8680dc5f7ba9b
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_13-56-06_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/output.log b/wandb/run-20250212_135631-dnrqwgd0/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..0c7c41f13b26424a300a12e79a0240a7acd5a6a5
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/files/output.log
@@ -0,0 +1,23 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+Inside on_epoch_begin - train_dataloader: None
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 632, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 581, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 557, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt b/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..65ae49a1584b5c0324cbf98ee13156b2a98ec35e
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T13:56:31.423900Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313777905664"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..b147dcc35bf777332920dc3f3ca4421b26256461
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T13:56:31.233734243Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmp_1p0wl/port-231645.txt","pid":231645,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T13:56:31.237208984Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":231645}
+{"time":"2025-02-12T13:56:31.237177324Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38409,"Zone":""}}
+{"time":"2025-02-12T13:56:31.418128107Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:31.426328298Z","level":"INFO","msg":"handleInformInit: received","streamId":"dnrqwgd0","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:31.532291862Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"dnrqwgd0","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:31.979710518Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:31.979771727Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:31.979834686Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T13:56:31.979890276Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:32.195706105Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:38409->127.0.0.1:60084: use of closed network connection","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:33.231929311Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:33.231969701Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:60084"}
+{"time":"2025-02-12T13:56:33.231990701Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..dd5c1ea629fb01670d3aa33b82105add3e58fdbf
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T13:56:31.426643885Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug-core.log"}
+{"time":"2025-02-12T13:56:31.532231333Z","level":"INFO","msg":"created new stream","id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:31.532282222Z","level":"INFO","msg":"stream: started","id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:31.532401851Z","level":"INFO","msg":"writer: Do: started","stream_id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:31.532436711Z","level":"INFO","msg":"sender: started","stream_id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:31.53251893Z","level":"INFO","msg":"handler: started","stream_id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:31.831057361Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T13:56:31.979836506Z","level":"INFO","msg":"stream: closing","id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:31.979949235Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T13:56:31.981148335Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T13:56:33.006463404Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T13:56:33.231629384Z","level":"INFO","msg":"handler: closed","stream_id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:33.231703643Z","level":"INFO","msg":"writer: Close: closed","stream_id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:33.231748163Z","level":"INFO","msg":"sender: closed","stream_id":"dnrqwgd0"}
+{"time":"2025-02-12T13:56:33.231782443Z","level":"INFO","msg":"stream: closed","id":"dnrqwgd0"}
diff --git a/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log b/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e628857ea58f73e686450dd9fd9e0941437c4aac
--- /dev/null
+++ b/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Configure stats pid to 231645
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug.log
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_135631-dnrqwgd0/logs/debug-internal.log
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:init():756] calling init triggers
+2025-02-12 13:56:31,208 INFO    MainThread:231645 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 13:56:31,209 INFO    MainThread:231645 [wandb_init.py:init():789] starting backend
+2025-02-12 13:56:31,417 INFO    MainThread:231645 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 13:56:31,423 INFO    MainThread:231645 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 13:56:31,423 INFO    MainThread:231645 [wandb_init.py:init():808] backend started and connected
+2025-02-12 13:56:31,425 INFO    MainThread:231645 [wandb_init.py:init():901] updated telemetry
+2025-02-12 13:56:31,430 INFO    MainThread:231645 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 13:56:31,828 INFO    MainThread:231645 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 13:56:31,939 INFO    MainThread:231645 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 13:56:31,941 INFO    MainThread:231645 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 13:56:31,942 INFO    MainThread:231645 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_13-56-06_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 13:56:31,944 INFO    MainThread:231645 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7bfc4be86ff0>>
+2025-02-12 13:56:31,945 INFO    MainThread:231645 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 13:56:31,980 WARNING MsgRouterThr:231645 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb b/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..f4fa1b56219f3fbe8b9ded10000ea6f8e8711b5b
Binary files /dev/null and b/wandb/run-20250212_135631-dnrqwgd0/run-dnrqwgd0.wandb differ
diff --git a/wandb/run-20250212_140456-85d9ssit/files/config.yaml b/wandb/run-20250212_140456-85d9ssit/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20459048deba1181e28d01917b6ee58a0b19c371
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_14-04-28_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_140456-85d9ssit/files/output.log b/wandb/run-20250212_140456-85d9ssit/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..47ae9b884ed0bd7b0b1e663b294089b5065b6378
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/files/output.log
@@ -0,0 +1,22 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
+    self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
+    return self.call_event("on_epoch_begin", args, state, control)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
+    result = getattr(callback, event)(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
+    if isinstance(train_dataloader.dataset, IterableDatasetShard):
+                  ^^^^^^^^^^^^^^^^^^^^^^^^
+AttributeError: 'NoneType' object has no attribute 'dataset'
diff --git a/wandb/run-20250212_140456-85d9ssit/files/requirements.txt b/wandb/run-20250212_140456-85d9ssit/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json b/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..609a64191cc1e6ded5bd0a4031c0e83b27c08926
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/files/wandb-metadata.json
@@ -0,0 +1,87 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T14:04:56.751445Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--audio_column_name=audio",
+    "--text_column_name=sentence",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "313778016256"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json b/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c37fe1cbbb8aed86fd461a79642cb991e4d35cf
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":0}}
\ No newline at end of file
diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log b/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..f601fa971eb613b8b904449c275d1827295ba786
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-02-12T14:04:56.567564578Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqs28ml67/port-232359.txt","pid":232359,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T14:04:56.573119086Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":232359}
+{"time":"2025-02-12T14:04:56.573060477Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41429,"Zone":""}}
+{"time":"2025-02-12T14:04:56.745144471Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:56.753547786Z","level":"INFO","msg":"handleInformInit: received","streamId":"85d9ssit","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:56.859061499Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"85d9ssit","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:57.327873486Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:57.327950506Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:57.327989686Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T14:04:57.328056845Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:57.543980132Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:41429->127.0.0.1:35380: use of closed network connection","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:58.65202789Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:58.65205631Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35380"}
+{"time":"2025-02-12T14:04:58.65210661Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log b/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..34dfbdb326b60005e9dada05ae305ad436dfd321
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T14:04:56.753826604Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug-core.log"}
+{"time":"2025-02-12T14:04:56.859010159Z","level":"INFO","msg":"created new stream","id":"85d9ssit"}
+{"time":"2025-02-12T14:04:56.859052399Z","level":"INFO","msg":"stream: started","id":"85d9ssit"}
+{"time":"2025-02-12T14:04:56.859127668Z","level":"INFO","msg":"writer: Do: started","stream_id":"85d9ssit"}
+{"time":"2025-02-12T14:04:56.859226827Z","level":"INFO","msg":"sender: started","stream_id":"85d9ssit"}
+{"time":"2025-02-12T14:04:56.859302168Z","level":"INFO","msg":"handler: started","stream_id":"85d9ssit"}
+{"time":"2025-02-12T14:04:57.172644512Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T14:04:57.327994966Z","level":"INFO","msg":"stream: closing","id":"85d9ssit"}
+{"time":"2025-02-12T14:04:57.328025695Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T14:04:57.328790509Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T14:04:58.425349995Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T14:04:58.651670973Z","level":"INFO","msg":"handler: closed","stream_id":"85d9ssit"}
+{"time":"2025-02-12T14:04:58.651730382Z","level":"INFO","msg":"sender: closed","stream_id":"85d9ssit"}
+{"time":"2025-02-12T14:04:58.651712323Z","level":"INFO","msg":"writer: Close: closed","stream_id":"85d9ssit"}
+{"time":"2025-02-12T14:04:58.651855241Z","level":"INFO","msg":"stream: closed","id":"85d9ssit"}
diff --git a/wandb/run-20250212_140456-85d9ssit/logs/debug.log b/wandb/run-20250212_140456-85d9ssit/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..1164b0f163d2f03c5598f4e9dbd5df03ddd05535
--- /dev/null
+++ b/wandb/run-20250212_140456-85d9ssit/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Configure stats pid to 232359
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug.log
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_140456-85d9ssit/logs/debug-internal.log
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:init():756] calling init triggers
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 14:04:56,535 INFO    MainThread:232359 [wandb_init.py:init():789] starting backend
+2025-02-12 14:04:56,745 INFO    MainThread:232359 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 14:04:56,750 INFO    MainThread:232359 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 14:04:56,751 INFO    MainThread:232359 [wandb_init.py:init():808] backend started and connected
+2025-02-12 14:04:56,753 INFO    MainThread:232359 [wandb_init.py:init():901] updated telemetry
+2025-02-12 14:04:56,760 INFO    MainThread:232359 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 14:04:57,169 INFO    MainThread:232359 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 14:04:57,287 INFO    MainThread:232359 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 14:04:57,289 INFO    MainThread:232359 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 14:04:57,290 INFO    MainThread:232359 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-04-28_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 14:04:57,292 INFO    MainThread:232359 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7fea6eea2e70>>
+2025-02-12 14:04:57,292 INFO    MainThread:232359 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 14:04:57,328 WARNING MsgRouterThr:232359 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb b/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..6aecf6528222a0b9ca923222cc04215f30e7b215
Binary files /dev/null and b/wandb/run-20250212_140456-85d9ssit/run-85d9ssit.wandb differ
diff --git a/wandb/run-20250212_144814-onbjaexn/files/config.yaml b/wandb/run-20250212_144814-onbjaexn/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7bf2050c408e1c177d2cf067168af9e543d1374
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_14-47-46_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_144814-onbjaexn/files/output.log b/wandb/run-20250212_144814-onbjaexn/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..3a117abe7df89f3665e8ab773843e3452ad54bec
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/files/output.log
@@ -0,0 +1,49 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 635, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 584, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
+    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
+                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
+    batch_samples += [next(epoch_iterator)]
+                      ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
+    next_batch, next_batch_info = self._fetch_batches(main_iterator)
+                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
+    batches.append(next(iterator))
+                   ^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
+    data = self._next_data()
+           ^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
+    data.append(next(self.dataset_iter))
+                ^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
+    for key, example in ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
+    for key, example in iterator:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
+    for key, example in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
+    for x in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
+    processed_inputs = self.function(*function_args, **self.fn_kwargs)
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 474, in prepare_dataset
+    inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+                               ~~~~~~^^^^^^^^^
+KeyError: 'array'
diff --git a/wandb/run-20250212_144814-onbjaexn/files/requirements.txt b/wandb/run-20250212_144814-onbjaexn/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json b/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3617f33b6a9e0d7a88e02c955cc5ba8d7786fe2e
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T14:48:14.426245Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "314421264384"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json b/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d52051e315a7a21a9d9e5a40a517408bb086162
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log b/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..6129efa42871aad3a2eaf1f4ed957947fcadfc81
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log
@@ -0,0 +1,13 @@
+{"time":"2025-02-12T14:48:14.241936287Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpn0iuoxdb/port-235726.txt","pid":235726,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T14:48:14.269915432Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":235726}
+{"time":"2025-02-12T14:48:14.269970272Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44879,"Zone":""}}
+{"time":"2025-02-12T14:48:14.42013725Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:14.429435808Z","level":"INFO","msg":"handleInformInit: received","streamId":"onbjaexn","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:14.534605813Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"onbjaexn","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:17.23040957Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:17.230519299Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:17.230528979Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T14:48:17.230598258Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:18.137682794Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:18.137715544Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:38426"}
+{"time":"2025-02-12T14:48:18.137735074Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log b/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..20bf3cf198ac1918e999652096a9c93e415993c3
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T14:48:14.429796675Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug-core.log"}
+{"time":"2025-02-12T14:48:14.534527814Z","level":"INFO","msg":"created new stream","id":"onbjaexn"}
+{"time":"2025-02-12T14:48:14.534596853Z","level":"INFO","msg":"stream: started","id":"onbjaexn"}
+{"time":"2025-02-12T14:48:14.534694833Z","level":"INFO","msg":"writer: Do: started","stream_id":"onbjaexn"}
+{"time":"2025-02-12T14:48:14.534760432Z","level":"INFO","msg":"handler: started","stream_id":"onbjaexn"}
+{"time":"2025-02-12T14:48:14.534942391Z","level":"INFO","msg":"sender: started","stream_id":"onbjaexn"}
+{"time":"2025-02-12T14:48:14.842151491Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T14:48:17.230483349Z","level":"INFO","msg":"stream: closing","id":"onbjaexn"}
+{"time":"2025-02-12T14:48:17.230507079Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T14:48:17.231180494Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T14:48:17.906287174Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T14:48:18.137280277Z","level":"INFO","msg":"handler: closed","stream_id":"onbjaexn"}
+{"time":"2025-02-12T14:48:18.137369966Z","level":"INFO","msg":"writer: Close: closed","stream_id":"onbjaexn"}
+{"time":"2025-02-12T14:48:18.137418376Z","level":"INFO","msg":"sender: closed","stream_id":"onbjaexn"}
+{"time":"2025-02-12T14:48:18.137549935Z","level":"INFO","msg":"stream: closed","id":"onbjaexn"}
diff --git a/wandb/run-20250212_144814-onbjaexn/logs/debug.log b/wandb/run-20250212_144814-onbjaexn/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..ef4069012800c296bd0c14d5ed4d73d0c2467c96
--- /dev/null
+++ b/wandb/run-20250212_144814-onbjaexn/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Configure stats pid to 235726
+2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 14:48:14,212 INFO    MainThread:235726 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug.log
+2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_144814-onbjaexn/logs/debug-internal.log
+2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:init():756] calling init triggers
+2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 14:48:14,213 INFO    MainThread:235726 [wandb_init.py:init():789] starting backend
+2025-02-12 14:48:14,419 INFO    MainThread:235726 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 14:48:14,425 INFO    MainThread:235726 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 14:48:14,425 INFO    MainThread:235726 [wandb_init.py:init():808] backend started and connected
+2025-02-12 14:48:14,428 INFO    MainThread:235726 [wandb_init.py:init():901] updated telemetry
+2025-02-12 14:48:14,434 INFO    MainThread:235726 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 14:48:14,839 INFO    MainThread:235726 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 14:48:14,950 INFO    MainThread:235726 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 14:48:14,951 INFO    MainThread:235726 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 14:48:14,953 INFO    MainThread:235726 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-47-46_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 14:48:14,955 INFO    MainThread:235726 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x766a6ba7a9f0>>
+2025-02-12 14:48:14,955 INFO    MainThread:235726 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 14:48:17,230 WARNING MsgRouterThr:235726 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb b/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..3bcda516eefb5fc320ac07ed1aa34f5c893f6e18
Binary files /dev/null and b/wandb/run-20250212_144814-onbjaexn/run-onbjaexn.wandb differ
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/config.yaml b/wandb/run-20250212_145250-7h6sh6az/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1659e855217bf787e28b7c6fcdf6754048d4f34e
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_14-52-23_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/output.log b/wandb/run-20250212_145250-7h6sh6az/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..ed56b0930f3380f5d2cd8b2ba4660678179dbb05
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/files/output.log
@@ -0,0 +1,52 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 657, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 606, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
+    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
+                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
+    batch_samples += [next(epoch_iterator)]
+                      ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
+    next_batch, next_batch_info = self._fetch_batches(main_iterator)
+                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
+    batches.append(next(iterator))
+                   ^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
+    data = self._next_data()
+           ^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
+    data.append(next(self.dataset_iter))
+                ^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
+    for key, example in ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
+    for key, example in iterator:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
+    for key, example in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
+    for x in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
+    processed_inputs = self.function(*function_args, **self.fn_kwargs)
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 492, in prepare_dataset
+    inputs = feature_extractor(audio_array, sampling_rate=sampling_rate)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/models/whisper/feature_extraction_whisper.py", line 265, in __call__
+    raw_speech = np.asarray(raw_speech, dtype=np.float32)
+                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ValueError: could not convert string to float: 'common_voice_eu_39287311.wav'
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt b/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json b/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..e0270308edb884f1317d7a8c8ecd6d03846cb42d
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T14:52:51.028960Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "315195543552"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json b/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d52051e315a7a21a9d9e5a40a517408bb086162
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..bdc472ce22920952bc84d9ac978de3754c21a2ea
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log
@@ -0,0 +1,13 @@
+{"time":"2025-02-12T14:52:50.845987197Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp6ead6ms8/port-236505.txt","pid":236505,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T14:52:50.851144401Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":236505}
+{"time":"2025-02-12T14:52:50.851121011Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45803,"Zone":""}}
+{"time":"2025-02-12T14:52:51.022520498Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:51.031293581Z","level":"INFO","msg":"handleInformInit: received","streamId":"7h6sh6az","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:51.13681882Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"7h6sh6az","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:53.567639763Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:53.567714252Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:53.567766542Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T14:52:53.567883362Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:54.608402958Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:54.608420568Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:43038"}
+{"time":"2025-02-12T14:52:54.608430008Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..193e726955b8de9172c8a8da3854bacc0b3770ef
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T14:52:51.031699779Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug-core.log"}
+{"time":"2025-02-12T14:52:51.13677806Z","level":"INFO","msg":"created new stream","id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:51.13681223Z","level":"INFO","msg":"stream: started","id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:51.13682766Z","level":"INFO","msg":"writer: Do: started","stream_id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:51.136887979Z","level":"INFO","msg":"handler: started","stream_id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:51.136996279Z","level":"INFO","msg":"sender: started","stream_id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:51.40503864Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T14:52:53.567741802Z","level":"INFO","msg":"stream: closing","id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:53.567786892Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T14:52:53.56844074Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T14:52:54.38014778Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T14:52:54.608180109Z","level":"INFO","msg":"handler: closed","stream_id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:54.608239299Z","level":"INFO","msg":"writer: Close: closed","stream_id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:54.608252109Z","level":"INFO","msg":"sender: closed","stream_id":"7h6sh6az"}
+{"time":"2025-02-12T14:52:54.608324618Z","level":"INFO","msg":"stream: closed","id":"7h6sh6az"}
diff --git a/wandb/run-20250212_145250-7h6sh6az/logs/debug.log b/wandb/run-20250212_145250-7h6sh6az/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..9e0a5b7af23b12f958416746291b2016b27b0920
--- /dev/null
+++ b/wandb/run-20250212_145250-7h6sh6az/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Configure stats pid to 236505
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug.log
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145250-7h6sh6az/logs/debug-internal.log
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:init():756] calling init triggers
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 14:52:50,817 INFO    MainThread:236505 [wandb_init.py:init():789] starting backend
+2025-02-12 14:52:51,022 INFO    MainThread:236505 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 14:52:51,028 INFO    MainThread:236505 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 14:52:51,028 INFO    MainThread:236505 [wandb_init.py:init():808] backend started and connected
+2025-02-12 14:52:51,030 INFO    MainThread:236505 [wandb_init.py:init():901] updated telemetry
+2025-02-12 14:52:51,037 INFO    MainThread:236505 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 14:52:51,402 INFO    MainThread:236505 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 14:52:51,519 INFO    MainThread:236505 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 14:52:51,521 INFO    MainThread:236505 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 14:52:51,522 INFO    MainThread:236505 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-52-23_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 14:52:51,524 INFO    MainThread:236505 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7289d4692540>>
+2025-02-12 14:52:51,524 INFO    MainThread:236505 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 14:52:53,567 WARNING MsgRouterThr:236505 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb b/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..11838bad33d6e4a1a00df232467c3bd278b754a5
Binary files /dev/null and b/wandb/run-20250212_145250-7h6sh6az/run-7h6sh6az.wandb differ
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml b/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1aee5532694c9c3a16dde77aeb5d364ddac0c141
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_14-54-21_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/output.log b/wandb/run-20250212_145446-ncnr0yzu/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..f01eb94ffe96faade97a93f96968111bebc6ef65
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/files/output.log
@@ -0,0 +1,52 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 653, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 602, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
+    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
+                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
+    batch_samples += [next(epoch_iterator)]
+                      ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
+    next_batch, next_batch_info = self._fetch_batches(main_iterator)
+                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
+    batches.append(next(iterator))
+                   ^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
+    data = self._next_data()
+           ^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
+    data.append(next(self.dataset_iter))
+                ^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
+    for key, example in ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
+    for key, example in iterator:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
+    for key, example in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
+    for x in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
+    processed_inputs = self.function(*function_args, **self.fn_kwargs)
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 484, in prepare_dataset
+    inputs = feature_extractor(batch[audio_column_name], sampling_rate=feature_extractor.sampling_rate)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/models/whisper/feature_extraction_whisper.py", line 265, in __call__
+    raw_speech = np.asarray(raw_speech, dtype=np.float32)
+                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+TypeError: float() argument must be a string or a real number, not 'dict'
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt b/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f58184ad4e6e2757e4e20e41ecb4b819e7e4daf
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T14:54:46.573889Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "315195682816"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d52051e315a7a21a9d9e5a40a517408bb086162
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..db5aa803baede20483e71c9d07c375e3db38e6fd
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log
@@ -0,0 +1,13 @@
+{"time":"2025-02-12T14:54:46.391038145Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpn5cre9oi/port-236985.txt","pid":236985,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T14:54:46.395903327Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":236985}
+{"time":"2025-02-12T14:54:46.395861037Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38029,"Zone":""}}
+{"time":"2025-02-12T14:54:46.567245341Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:46.576515207Z","level":"INFO","msg":"handleInformInit: received","streamId":"ncnr0yzu","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:46.683228559Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ncnr0yzu","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:48.66373831Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:48.66385373Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T14:54:48.66383821Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:48.663953679Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:49.591574304Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:49.591599923Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:46988"}
+{"time":"2025-02-12T14:54:49.591615933Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..4e45c5ab16d921728ca58083736e7bfa341dd2e1
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T14:54:46.576935865Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug-core.log"}
+{"time":"2025-02-12T14:54:46.683143569Z","level":"INFO","msg":"created new stream","id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:46.683218799Z","level":"INFO","msg":"stream: started","id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:46.683354688Z","level":"INFO","msg":"writer: Do: started","stream_id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:46.683407538Z","level":"INFO","msg":"sender: started","stream_id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:46.683417878Z","level":"INFO","msg":"handler: started","stream_id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:46.986318334Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T14:54:48.66385817Z","level":"INFO","msg":"stream: closing","id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:48.66390572Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T14:54:48.664605317Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T14:54:49.357544434Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T14:54:49.591242244Z","level":"INFO","msg":"handler: closed","stream_id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:49.591302134Z","level":"INFO","msg":"writer: Close: closed","stream_id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:49.591331464Z","level":"INFO","msg":"sender: closed","stream_id":"ncnr0yzu"}
+{"time":"2025-02-12T14:54:49.591428814Z","level":"INFO","msg":"stream: closed","id":"ncnr0yzu"}
diff --git a/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log b/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..d1847e519db8f0646fddd46ec510ca163b76e2ca
--- /dev/null
+++ b/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Configure stats pid to 236985
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug.log
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145446-ncnr0yzu/logs/debug-internal.log
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:init():756] calling init triggers
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 14:54:46,356 INFO    MainThread:236985 [wandb_init.py:init():789] starting backend
+2025-02-12 14:54:46,567 INFO    MainThread:236985 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 14:54:46,573 INFO    MainThread:236985 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 14:54:46,573 INFO    MainThread:236985 [wandb_init.py:init():808] backend started and connected
+2025-02-12 14:54:46,575 INFO    MainThread:236985 [wandb_init.py:init():901] updated telemetry
+2025-02-12 14:54:46,582 INFO    MainThread:236985 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 14:54:46,982 INFO    MainThread:236985 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 14:54:47,097 INFO    MainThread:236985 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 14:54:47,099 INFO    MainThread:236985 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 14:54:47,100 INFO    MainThread:236985 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-54-21_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 14:54:47,102 INFO    MainThread:236985 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7844fa86aff0>>
+2025-02-12 14:54:47,102 INFO    MainThread:236985 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 14:54:48,664 WARNING MsgRouterThr:236985 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb b/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..65c0bd3fd32e7d2796b16127fa20b73e04cb090e
Binary files /dev/null and b/wandb/run-20250212_145446-ncnr0yzu/run-ncnr0yzu.wandb differ
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml b/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b8c28c55668d6406f48f51123b6234e497287be
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/files/config.yaml
@@ -0,0 +1,512 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_14-58-28_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/output.log b/wandb/run-20250212_145852-0gfsy6hh/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..e5c0ebecf44b6daff661d6bce1288f2c744ab14f
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/files/output.log
@@ -0,0 +1,49 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 639, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 588, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2464, in _inner_training_loop
+    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
+                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 5098, in get_batch_samples
+    batch_samples += [next(epoch_iterator)]
+                      ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 812, in __iter__
+    next_batch, next_batch_info = self._fetch_batches(main_iterator)
+                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 766, in _fetch_batches
+    batches.append(next(iterator))
+                   ^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
+    data = self._next_data()
+           ^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
+    data.append(next(self.dataset_iter))
+                ^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2289, in __iter__
+    for key, example in ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1351, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1407, in _iter
+    for key, example in iterator:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1884, in __iter__
+    for key, example in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1562, in __iter__
+    for x in self.ex_iterable:
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1059, in __iter__
+    yield from self._iter()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1159, in _iter
+    processed_inputs = self.function(*function_args, **self.fn_kwargs)
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 477, in prepare_dataset
+    audio_array = sample["array"]
+                  ~~~~~~^^^^^^^^^
+KeyError: 'array'
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt b/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..91a6670d98692edca3c7c2302ed41497a23b0b97
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-metadata.json
@@ -0,0 +1,85 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T14:58:52.625032Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "315206733824"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d52051e315a7a21a9d9e5a40a517408bb086162
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..5bc552cfe42bb92f4ee190b01d46a0bd5b93b82d
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log
@@ -0,0 +1,13 @@
+{"time":"2025-02-12T14:58:52.442304412Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpkux429nf/port-237900.txt","pid":237900,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T14:58:52.44698044Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":237900}
+{"time":"2025-02-12T14:58:52.446971151Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":42317,"Zone":""}}
+{"time":"2025-02-12T14:58:52.618246003Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:52.62556589Z","level":"INFO","msg":"handleInformInit: received","streamId":"0gfsy6hh","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:52.729897747Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"0gfsy6hh","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:54.959605329Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:54.959661068Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:54.959726878Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:54.959733858Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T14:58:55.879911345Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:55.879969174Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:39202"}
+{"time":"2025-02-12T14:58:55.879979154Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..c315ae03c741865692c58d12afec5388b478f0a7
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T14:58:52.62568889Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug-core.log"}
+{"time":"2025-02-12T14:58:52.729819547Z","level":"INFO","msg":"created new stream","id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:52.729885047Z","level":"INFO","msg":"stream: started","id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:52.729955257Z","level":"INFO","msg":"sender: started","stream_id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:52.729941107Z","level":"INFO","msg":"writer: Do: started","stream_id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:52.730185226Z","level":"INFO","msg":"handler: started","stream_id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:52.998836495Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T14:58:54.959699198Z","level":"INFO","msg":"stream: closing","id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:54.959735308Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T14:58:54.960467105Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T14:58:55.63479944Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T14:58:55.878933079Z","level":"INFO","msg":"handler: closed","stream_id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:55.878978098Z","level":"INFO","msg":"sender: closed","stream_id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:55.878984668Z","level":"INFO","msg":"writer: Close: closed","stream_id":"0gfsy6hh"}
+{"time":"2025-02-12T14:58:55.879264427Z","level":"INFO","msg":"stream: closed","id":"0gfsy6hh"}
diff --git a/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log b/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..b6929aba4219df8569a99e62bdfd8415b77c4a11
--- /dev/null
+++ b/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Configure stats pid to 237900
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug.log
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_145852-0gfsy6hh/logs/debug-internal.log
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:init():756] calling init triggers
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 14:58:52,407 INFO    MainThread:237900 [wandb_init.py:init():789] starting backend
+2025-02-12 14:58:52,618 INFO    MainThread:237900 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 14:58:52,624 INFO    MainThread:237900 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 14:58:52,624 INFO    MainThread:237900 [wandb_init.py:init():808] backend started and connected
+2025-02-12 14:58:52,627 INFO    MainThread:237900 [wandb_init.py:init():901] updated telemetry
+2025-02-12 14:58:52,634 INFO    MainThread:237900 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 14:58:52,995 INFO    MainThread:237900 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 14:58:53,107 INFO    MainThread:237900 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 14:58:53,108 INFO    MainThread:237900 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 14:58:53,110 INFO    MainThread:237900 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_14-58-28_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 14:58:53,112 INFO    MainThread:237900 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eaf4e0976e0>>
+2025-02-12 14:58:53,112 INFO    MainThread:237900 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 14:58:54,959 WARNING MsgRouterThr:237900 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb b/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..38232a1d6938c32ba68a2c2ebc20cb3a1500e7dc
Binary files /dev/null and b/wandb/run-20250212_145852-0gfsy6hh/run-0gfsy6hh.wandb differ
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/config.yaml b/wandb/run-20250212_152506-cp47eoxt/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ad19f35aa9b0b821482834434adf1e4e566842e
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/files/config.yaml
@@ -0,0 +1,536 @@
+_attn_implementation_autoset:
+    value: true
+_name_or_path:
+    value: openai/whisper-small
+_wandb:
+    value:
+        cli_version: 0.19.6
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": train/loss
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/grad_norm
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/learning_rate
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.19.6
+            "6": 4.49.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.19.6
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+average_tokens_across_devices:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 768
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 12
+decoder_ffn_dim:
+    value: 3072
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 12
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 12
+encoder_ffn_dim:
+    value: 3072
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 12
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 1000
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 225
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: null
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 1e-05
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Feb12_15-24-15_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 8000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 241734912
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 12
+num_mel_bins:
+    value: 80
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50257
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 16
+per_device_train_batch_size:
+    value: 32
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-small-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float32
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.49.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51865
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/output.log b/wandb/run-20250212_152506-cp47eoxt/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..d5181e7d9ff6e24c3fb944c5af3fc84ec3922a2c
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/files/output.log
@@ -0,0 +1,28 @@
+  0%|                                                                                       | 0/8000 [00:00<?, ?it/s]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
+[INFO|trainer_utils.py:837] 2025-02-12 15:25:12,786 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+  0%|▏                                                                           | 25/8000 [00:33<2:23:47,  1.08s/it]Traceback (most recent call last):
+{'loss': 2.3284, 'grad_norm': 17.581905364990234, 'learning_rate': 4.4e-07, 'epoch': 0.0}
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 719, in <module>
+    main()
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 668, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2512, in _inner_training_loop
+    tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 3662, in training_step
+    self.accelerator.backward(loss, **kwargs)
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/accelerate/accelerator.py", line 2242, in backward
+    self.scaler.scale(loss).backward(**kwargs)
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/_tensor.py", line 626, in backward
+    torch.autograd.backward(
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/autograd/__init__.py", line 347, in backward
+    _engine_run_backward(
+  File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/torch/autograd/graph.py", line 823, in _engine_run_backward
+    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+KeyboardInterrupt
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt b/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json b/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..741ca0342dc8378ca92566276e75c09582efae0d
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/files/wandb-metadata.json
@@ -0,0 +1,86 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T15:25:06.501811Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--audio_column_name=audio",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "315485667328"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json b/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..80f564210f160830a713856b51172daf9a4d37cf
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_step":0,"train/grad_norm":17.581905364990234,"train/learning_rate":4.4e-07,"train/epoch":0.003125,"train/global_step":25,"_timestamp":1.7393739409734626e+09,"_wandb":{"runtime":35},"_runtime":34.472001053,"train/loss":2.3284}
\ No newline at end of file
diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..988f517348f024820b907a90b8994b2652345846
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log
@@ -0,0 +1,13 @@
+{"time":"2025-02-12T15:25:06.32070089Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_o_4dslg/port-242535.txt","pid":242535,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T15:25:06.325654679Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":242535}
+{"time":"2025-02-12T15:25:06.325624039Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34421,"Zone":""}}
+{"time":"2025-02-12T15:25:06.495145129Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:06.504420484Z","level":"INFO","msg":"handleInformInit: received","streamId":"cp47eoxt","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:06.610655359Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"cp47eoxt","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:41.518158713Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:41.518235362Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:41.518248342Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-02-12T15:25:41.518365211Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:42.440985993Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:42.441028483Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:35824"}
+{"time":"2025-02-12T15:25:42.441053643Z","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..ddd19b9298efd912885b30af4c20522b4d39c052
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-02-12T15:25:06.504719321Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug-core.log"}
+{"time":"2025-02-12T15:25:06.61058157Z","level":"INFO","msg":"created new stream","id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:06.61064572Z","level":"INFO","msg":"stream: started","id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:06.610715339Z","level":"INFO","msg":"writer: Do: started","stream_id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:06.610734969Z","level":"INFO","msg":"handler: started","stream_id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:06.610881007Z","level":"INFO","msg":"sender: started","stream_id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:06.883150548Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-02-12T15:25:41.518262532Z","level":"INFO","msg":"stream: closing","id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:41.518304801Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-02-12T15:25:41.519096684Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-02-12T15:25:42.16914698Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-02-12T15:25:42.440671227Z","level":"INFO","msg":"handler: closed","stream_id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:42.440734176Z","level":"INFO","msg":"writer: Close: closed","stream_id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:42.440750356Z","level":"INFO","msg":"sender: closed","stream_id":"cp47eoxt"}
+{"time":"2025-02-12T15:25:42.440859685Z","level":"INFO","msg":"stream: closed","id":"cp47eoxt"}
diff --git a/wandb/run-20250212_152506-cp47eoxt/logs/debug.log b/wandb/run-20250212_152506-cp47eoxt/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..5a044527c50b212ff0a9c5959254662b8900d25c
--- /dev/null
+++ b/wandb/run-20250212_152506-cp47eoxt/logs/debug.log
@@ -0,0 +1,26 @@
+2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Configure stats pid to 242535
+2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 15:25:06,284 INFO    MainThread:242535 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug.log
+2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152506-cp47eoxt/logs/debug-internal.log
+2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:init():756] calling init triggers
+2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 15:25:06,285 INFO    MainThread:242535 [wandb_init.py:init():789] starting backend
+2025-02-12 15:25:06,495 INFO    MainThread:242535 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 15:25:06,501 INFO    MainThread:242535 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 15:25:06,501 INFO    MainThread:242535 [wandb_init.py:init():808] backend started and connected
+2025-02-12 15:25:06,503 INFO    MainThread:242535 [wandb_init.py:init():901] updated telemetry
+2025-02-12 15:25:06,511 INFO    MainThread:242535 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 15:25:06,880 INFO    MainThread:242535 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 15:25:06,988 INFO    MainThread:242535 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 15:25:06,990 INFO    MainThread:242535 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 15:25:06,991 INFO    MainThread:242535 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-24-15_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 15:25:06,993 INFO    MainThread:242535 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x730850e96e70>>
+2025-02-12 15:25:06,993 INFO    MainThread:242535 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
+2025-02-12 15:25:41,518 WARNING MsgRouterThr:242535 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb b/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..cbcfbc2adf86bb17c7862166061d04cf4fb5218e
Binary files /dev/null and b/wandb/run-20250212_152506-cp47eoxt/run-cp47eoxt.wandb differ
diff --git a/wandb/run-20250212_152709-lejyafmi/files/output.log b/wandb/run-20250212_152709-lejyafmi/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..0942859f01dea08787151d6c28034228a0faaf48
--- /dev/null
+++ b/wandb/run-20250212_152709-lejyafmi/files/output.log
@@ -0,0 +1,599 @@
+  0%|                                                                                                                                               | 0/8000 [00:00<?, ?it/s]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
+[INFO|trainer_utils.py:837] 2025-02-12 15:27:14,718 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+                                                                                                                                                                             
+{'loss': 2.3284, 'grad_norm': 17.579944610595703, 'learning_rate': 4.4e-07, 'epoch': 0.0}
+{'loss': 1.9145, 'grad_norm': 9.753120422363281, 'learning_rate': 9.400000000000001e-07, 'epoch': 0.01}
+{'loss': 1.2892, 'grad_norm': 9.469987869262695, 'learning_rate': 1.44e-06, 'epoch': 0.01}
+{'loss': 0.9797, 'grad_norm': 6.952774524688721, 'learning_rate': 1.94e-06, 'epoch': 0.01}
+{'loss': 0.8265, 'grad_norm': 6.080902576446533, 'learning_rate': 2.4400000000000004e-06, 'epoch': 0.02}
+{'loss': 0.6998, 'grad_norm': 5.6766037940979, 'learning_rate': 2.9400000000000002e-06, 'epoch': 0.02}
+{'loss': 0.6537, 'grad_norm': 5.372249126434326, 'learning_rate': 3.44e-06, 'epoch': 0.02}
+{'loss': 0.6149, 'grad_norm': 5.710323810577393, 'learning_rate': 3.94e-06, 'epoch': 0.03}
+{'loss': 0.5256, 'grad_norm': 5.235953330993652, 'learning_rate': 4.440000000000001e-06, 'epoch': 0.03}
+{'loss': 0.54, 'grad_norm': 6.58635950088501, 'learning_rate': 4.94e-06, 'epoch': 0.03}
+{'loss': 0.5521, 'grad_norm': 5.4912004470825195, 'learning_rate': 5.4400000000000004e-06, 'epoch': 0.03}
+{'loss': 0.5379, 'grad_norm': 5.846869945526123, 'learning_rate': 5.94e-06, 'epoch': 0.04}
+{'loss': 0.4778, 'grad_norm': 5.060309410095215, 'learning_rate': 6.440000000000001e-06, 'epoch': 0.04}
+{'loss': 0.4152, 'grad_norm': 5.06487512588501, 'learning_rate': 6.9400000000000005e-06, 'epoch': 0.04}
+{'loss': 0.3547, 'grad_norm': 4.936045169830322, 'learning_rate': 7.440000000000001e-06, 'epoch': 0.05}
+{'loss': 0.3428, 'grad_norm': 3.8072471618652344, 'learning_rate': 7.94e-06, 'epoch': 0.05}
+{'loss': 0.3099, 'grad_norm': 3.9378795623779297, 'learning_rate': 8.44e-06, 'epoch': 0.05}
+{'loss': 0.2963, 'grad_norm': 3.732869863510132, 'learning_rate': 8.94e-06, 'epoch': 0.06}
+{'loss': 0.2745, 'grad_norm': 3.9596025943756104, 'learning_rate': 9.440000000000001e-06, 'epoch': 0.06}
+{'loss': 0.2626, 'grad_norm': 3.428398370742798, 'learning_rate': 9.940000000000001e-06, 'epoch': 0.06}
+{'loss': 0.2411, 'grad_norm': 5.03747034072876, 'learning_rate': 9.970666666666668e-06, 'epoch': 0.07}
+{'loss': 0.2389, 'grad_norm': 3.2012217044830322, 'learning_rate': 9.937333333333334e-06, 'epoch': 0.07}
+{'loss': 0.2217, 'grad_norm': 3.7361278533935547, 'learning_rate': 9.904e-06, 'epoch': 0.07}
+{'loss': 0.2246, 'grad_norm': 4.509885787963867, 'learning_rate': 9.870666666666667e-06, 'epoch': 0.07}
+{'loss': 0.199, 'grad_norm': 3.462961435317993, 'learning_rate': 9.837333333333335e-06, 'epoch': 0.08}
+{'loss': 0.2156, 'grad_norm': 2.764691114425659, 'learning_rate': 9.804000000000001e-06, 'epoch': 0.08}
+{'loss': 0.212, 'grad_norm': 3.059408187866211, 'learning_rate': 9.770666666666668e-06, 'epoch': 0.08}
+{'loss': 0.2123, 'grad_norm': 3.952425718307495, 'learning_rate': 9.737333333333334e-06, 'epoch': 0.09}
+{'loss': 0.2343, 'grad_norm': 4.892609119415283, 'learning_rate': 9.704e-06, 'epoch': 0.09}
+{'loss': 0.3308, 'grad_norm': 4.592615127563477, 'learning_rate': 9.670666666666667e-06, 'epoch': 0.09}
+{'loss': 0.3146, 'grad_norm': 4.663967132568359, 'learning_rate': 9.637333333333333e-06, 'epoch': 0.1}
+{'loss': 0.3519, 'grad_norm': 5.091048717498779, 'learning_rate': 9.604000000000002e-06, 'epoch': 0.1}
+{'loss': 0.2365, 'grad_norm': 3.8216071128845215, 'learning_rate': 9.570666666666666e-06, 'epoch': 0.1}
+{'loss': 0.193, 'grad_norm': 3.122516393661499, 'learning_rate': 9.537333333333334e-06, 'epoch': 0.11}
+{'loss': 0.1759, 'grad_norm': 2.657339096069336, 'learning_rate': 9.504e-06, 'epoch': 0.11}
+{'loss': 0.2387, 'grad_norm': 4.554510116577148, 'learning_rate': 9.470666666666667e-06, 'epoch': 0.11}
+{'loss': 0.2845, 'grad_norm': 5.045220851898193, 'learning_rate': 9.437333333333334e-06, 'epoch': 0.12}
+{'loss': 0.2755, 'grad_norm': 4.260054588317871, 'learning_rate': 9.404e-06, 'epoch': 0.12}
+{'loss': 0.481, 'grad_norm': 5.8209147453308105, 'learning_rate': 9.370666666666668e-06, 'epoch': 0.12}
+{'loss': 0.3998, 'grad_norm': 5.498444557189941, 'learning_rate': 9.337333333333335e-06, 'epoch': 0.12}
+***** Running Evaluation *****
+[INFO|trainer.py:4180] 2025-02-12 15:47:16,534 >>   Num examples: Unknown
+[INFO|trainer.py:4181] 2025-02-12 15:47:16,534 >>   Batch size = 16
+[INFO|trainer_utils.py:837] 2025-02-12 15:47:24,994 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+[WARNING|logging.py:329] 2025-02-12 15:47:25,085 >> Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:25,197 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[WARNING|logging.py:329] 2025-02-12 15:47:25,198 >> The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:26,487 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:27,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:28,903 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:29,960 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:31,125 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:32,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:33,355 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:34,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:35,741 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:36,871 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:37,975 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:39,025 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:39,971 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:41,203 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:42,188 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:43,134 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:44,323 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:45,270 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:46,187 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:47,189 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:48,181 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:49,143 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:50,185 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:51,161 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:52,183 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:53,261 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:54,446 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:55,377 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:56,390 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:57,470 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:58,505 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:47:59,581 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:00,601 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:01,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:02,774 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:03,761 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:04,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:05,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:06,875 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:07,888 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:08,928 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:09,869 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:10,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:11,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:12,891 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:13,888 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:14,855 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:15,977 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:16,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:19,165 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:20,241 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:21,179 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:22,187 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:23,191 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:24,292 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:25,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:26,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:27,247 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:28,207 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:29,289 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:30,296 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:31,338 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:32,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:33,286 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:34,193 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:35,205 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:36,200 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:37,273 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:38,271 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:39,272 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:40,397 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:41,425 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:42,511 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:43,561 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:44,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:45,566 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:46,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:47,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:48,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:49,564 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:50,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:51,564 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:52,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:53,647 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:54,606 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:55,632 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:56,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:57,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:58,939 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:48:59,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:00,958 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:01,925 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:02,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:04,023 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:05,076 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:06,086 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:07,100 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:08,098 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:09,138 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:10,195 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:11,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:12,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:13,333 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:14,460 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:15,491 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:16,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:17,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:18,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:19,544 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:20,575 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:21,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:22,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:23,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:24,520 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:25,491 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:26,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:27,608 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:28,604 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:29,597 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:30,632 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:31,579 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:32,624 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:33,643 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:34,623 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:35,589 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:36,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:37,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:38,570 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:39,576 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:40,578 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 15:49:41,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+ 12%|████████████████▎                                                                                                                 | 1000/8000 [22:39<2:10:54,  1.12s/it][INFO|trainer.py:3860] 2025-02-12 15:49:49,799 >> Saving model checkpoint to ./checkpoint-1000
+{'eval_loss': 0.36512792110443115, 'eval_wer': 21.50135552023932, 'eval_runtime': 153.2646, 'eval_samples_per_second': 13.728, 'eval_steps_per_second': 0.861, 'epoch': 0.12}
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:2810: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.
+  warnings.warn(
+[INFO|configuration_utils.py:423] 2025-02-12 15:49:49,801 >> Configuration saved in ./checkpoint-1000/config.json
+[INFO|configuration_utils.py:906] 2025-02-12 15:49:49,802 >> Configuration saved in ./checkpoint-1000/generation_config.json
+[INFO|modeling_utils.py:3040] 2025-02-12 15:49:51,193 >> Model weights saved in ./checkpoint-1000/model.safetensors
+[INFO|feature_extraction_utils.py:437] 2025-02-12 15:49:51,195 >> Feature extractor saved in ./checkpoint-1000/preprocessor_config.json
+[INFO|feature_extraction_utils.py:437] 2025-02-12 15:49:54,577 >> Feature extractor saved in ./preprocessor_config.json
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
+ 13%|████████████████▏                                                                                                                | 1001/8000 [22:48<96:09:09, 49.46s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+02/12/2025 15:49:59 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+ 13%|████████████████▍                                                                                                                 | 1015/8000 [23:04<2:58:40,  1.53s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+                                                                                                                                                                             
+{'loss': 0.329, 'grad_norm': 4.732964515686035, 'learning_rate': 9.304000000000001e-06, 'epoch': 0.13}
+{'loss': 0.2319, 'grad_norm': 3.3556125164031982, 'learning_rate': 9.270666666666667e-06, 'epoch': 0.13}
+{'loss': 0.174, 'grad_norm': 2.9708847999572754, 'learning_rate': 9.237333333333334e-06, 'epoch': 0.13}
+{'loss': 0.1447, 'grad_norm': 2.841306447982788, 'learning_rate': 9.204e-06, 'epoch': 0.14}
+{'loss': 0.1406, 'grad_norm': 2.7909176349639893, 'learning_rate': 9.170666666666668e-06, 'epoch': 0.14}
+{'loss': 0.151, 'grad_norm': 3.37842059135437, 'learning_rate': 9.137333333333333e-06, 'epoch': 0.14}
+{'loss': 0.1529, 'grad_norm': 3.023977041244507, 'learning_rate': 9.104000000000001e-06, 'epoch': 0.15}
+{'loss': 0.1496, 'grad_norm': 3.015974283218384, 'learning_rate': 9.070666666666668e-06, 'epoch': 0.15}
+{'loss': 0.219, 'grad_norm': 4.30889892578125, 'learning_rate': 9.037333333333334e-06, 'epoch': 0.15}
+{'loss': 0.238, 'grad_norm': 4.160729885101318, 'learning_rate': 9.004e-06, 'epoch': 0.16}
+{'loss': 0.2603, 'grad_norm': 4.687659740447998, 'learning_rate': 8.970666666666667e-06, 'epoch': 0.16}
+{'loss': 0.2666, 'grad_norm': 4.577232837677002, 'learning_rate': 8.937333333333335e-06, 'epoch': 0.16}
+{'loss': 0.2337, 'grad_norm': 5.091732501983643, 'learning_rate': 8.904e-06, 'epoch': 0.17}
+{'loss': 0.2379, 'grad_norm': 4.125801086425781, 'learning_rate': 8.870666666666668e-06, 'epoch': 0.17}
+{'loss': 0.2215, 'grad_norm': 5.142183303833008, 'learning_rate': 8.837333333333334e-06, 'epoch': 0.17}
+{'loss': 0.2136, 'grad_norm': 4.486277103424072, 'learning_rate': 8.804e-06, 'epoch': 0.17}
+{'loss': 0.2214, 'grad_norm': 3.5466482639312744, 'learning_rate': 8.770666666666667e-06, 'epoch': 0.18}
+{'loss': 0.2113, 'grad_norm': 3.6199097633361816, 'learning_rate': 8.737333333333334e-06, 'epoch': 0.18}
+{'loss': 0.1552, 'grad_norm': 2.559951066970825, 'learning_rate': 8.704e-06, 'epoch': 0.18}
+{'loss': 0.1354, 'grad_norm': 2.9152133464813232, 'learning_rate': 8.670666666666666e-06, 'epoch': 0.19}
+{'loss': 0.144, 'grad_norm': 2.608732223510742, 'learning_rate': 8.637333333333335e-06, 'epoch': 0.19}
+{'loss': 0.1367, 'grad_norm': 4.0043416023254395, 'learning_rate': 8.604000000000001e-06, 'epoch': 0.19}
+{'loss': 0.1194, 'grad_norm': 2.3621206283569336, 'learning_rate': 8.570666666666667e-06, 'epoch': 0.2}
+{'loss': 0.1283, 'grad_norm': 2.6970181465148926, 'learning_rate': 8.537333333333334e-06, 'epoch': 0.2}
+{'loss': 0.1858, 'grad_norm': 4.737370014190674, 'learning_rate': 8.504000000000002e-06, 'epoch': 0.2}
+{'loss': 0.1995, 'grad_norm': 3.462738513946533, 'learning_rate': 8.470666666666667e-06, 'epoch': 0.21}
+{'loss': 0.2028, 'grad_norm': 4.608364582061768, 'learning_rate': 8.437333333333335e-06, 'epoch': 0.21}
+{'loss': 0.1952, 'grad_norm': 2.770601987838745, 'learning_rate': 8.404000000000001e-06, 'epoch': 0.21}
+{'loss': 0.1464, 'grad_norm': 3.041656017303467, 'learning_rate': 8.370666666666668e-06, 'epoch': 0.22}
+{'loss': 0.1424, 'grad_norm': 2.988032102584839, 'learning_rate': 8.337333333333334e-06, 'epoch': 0.22}
+{'loss': 0.1233, 'grad_norm': 3.0646026134490967, 'learning_rate': 8.304e-06, 'epoch': 0.22}
+{'loss': 0.1384, 'grad_norm': 2.617403268814087, 'learning_rate': 8.270666666666667e-06, 'epoch': 0.23}
+{'loss': 0.1208, 'grad_norm': 2.6170425415039062, 'learning_rate': 8.237333333333333e-06, 'epoch': 0.23}
+{'loss': 0.1176, 'grad_norm': 2.1296098232269287, 'learning_rate': 8.204000000000001e-06, 'epoch': 0.23}
+{'loss': 0.1189, 'grad_norm': 2.767275810241699, 'learning_rate': 8.170666666666668e-06, 'epoch': 0.23}
+{'loss': 0.1211, 'grad_norm': 2.7053661346435547, 'learning_rate': 8.137333333333334e-06, 'epoch': 0.24}
+{'loss': 0.1156, 'grad_norm': 2.281399965286255, 'learning_rate': 8.104e-06, 'epoch': 0.24}
+{'loss': 0.1517, 'grad_norm': 3.7013635635375977, 'learning_rate': 8.070666666666667e-06, 'epoch': 0.24}
+{'loss': 0.2002, 'grad_norm': 3.7125532627105713, 'learning_rate': 8.037333333333334e-06, 'epoch': 0.25}
+{'loss': 0.1975, 'grad_norm': 3.8716859817504883, 'learning_rate': 8.004e-06, 'epoch': 0.25}
+***** Running Evaluation *****
+[INFO|trainer.py:4180] 2025-02-12 16:09:35,224 >>   Num examples: Unknown
+[INFO|trainer.py:4181] 2025-02-12 16:09:35,311 >>   Batch size = 16
+[INFO|trainer_utils.py:837] 2025-02-12 16:09:44,012 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:44,213 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:45,353 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:46,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:48,009 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:49,122 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:50,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:51,439 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:52,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:53,705 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:54,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:55,943 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:56,994 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:57,978 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:58,906 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:09:59,962 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:00,881 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:01,789 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:02,826 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:03,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:04,701 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:05,692 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:06,679 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:07,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:08,636 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:09,613 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:10,609 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:11,670 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:12,812 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:13,765 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:14,780 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:15,841 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:16,909 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:17,972 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:18,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:20,066 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:21,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:22,101 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:23,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:24,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:26,400 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:27,375 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:28,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:29,342 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:30,332 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:31,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:32,379 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:33,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:34,359 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:35,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:36,544 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:37,617 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:38,677 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:39,605 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:40,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:41,648 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:42,760 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:43,678 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:44,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:45,707 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:46,693 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:47,755 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:48,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:49,767 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:50,800 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:51,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:52,626 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:53,649 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:54,655 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:55,662 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:56,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:57,658 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:58,766 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:10:59,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:00,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:01,876 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:02,925 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:03,865 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:04,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:05,883 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:06,855 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:07,827 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:08,786 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:09,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:10,854 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:11,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:12,879 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:13,904 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:15,016 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:16,066 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:17,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:18,230 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:19,257 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:20,246 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:21,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:22,318 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:23,359 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:24,378 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:25,394 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:26,375 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:27,419 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:28,474 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:29,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:30,540 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:31,620 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:32,753 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:33,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:34,847 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:35,944 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:36,936 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:37,905 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:38,956 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:40,009 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:41,004 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:41,938 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:42,935 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:43,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:44,962 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:46,048 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:47,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:48,088 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:49,116 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:50,045 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:51,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:52,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:53,118 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:54,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:55,060 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:56,087 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:57,119 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:58,152 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:11:59,142 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:12:00,092 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+ 25%|████████████████████████████████▌                                                                                                 | 2000/8000 [44:57<1:52:37,  1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:12:08,401 >> Saving model checkpoint to ./checkpoint-2000
+{'eval_loss': 0.2918355464935303, 'eval_wer': 15.873609423202767, 'eval_runtime': 153.1763, 'eval_samples_per_second': 13.736, 'eval_steps_per_second': 0.862, 'epoch': 0.25}
+[INFO|configuration_utils.py:423] 2025-02-12 16:12:08,403 >> Configuration saved in ./checkpoint-2000/config.json
+[INFO|configuration_utils.py:906] 2025-02-12 16:12:08,403 >> Configuration saved in ./checkpoint-2000/generation_config.json
+[INFO|modeling_utils.py:3040] 2025-02-12 16:12:09,828 >> Model weights saved in ./checkpoint-2000/model.safetensors
+[INFO|feature_extraction_utils.py:437] 2025-02-12 16:12:09,829 >> Feature extractor saved in ./checkpoint-2000/preprocessor_config.json
+[INFO|feature_extraction_utils.py:437] 2025-02-12 16:12:13,102 >> Feature extractor saved in ./preprocessor_config.json
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
+ 25%|████████████████████████████████▎                                                                                                | 2001/8000 [45:05<81:49:55, 49.11s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+02/12/2025 16:12:17 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+ 25%|████████████████████████████████▋                                                                                                 | 2013/8000 [45:24<3:17:54,  1.98s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+                                                                                                                                                                             
+{'loss': 0.1648, 'grad_norm': 2.4911813735961914, 'learning_rate': 7.970666666666668e-06, 'epoch': 0.25}
+{'loss': 0.1162, 'grad_norm': 2.604146718978882, 'learning_rate': 7.937333333333333e-06, 'epoch': 0.26}
+{'loss': 0.1135, 'grad_norm': 2.7352280616760254, 'learning_rate': 7.904000000000001e-06, 'epoch': 0.26}
+{'loss': 0.1153, 'grad_norm': 2.2932169437408447, 'learning_rate': 7.870666666666667e-06, 'epoch': 0.26}
+{'loss': 0.1005, 'grad_norm': 3.1734797954559326, 'learning_rate': 7.837333333333334e-06, 'epoch': 0.27}
+{'loss': 0.0988, 'grad_norm': 2.4353103637695312, 'learning_rate': 7.804e-06, 'epoch': 0.27}
+{'loss': 0.1028, 'grad_norm': 2.8655478954315186, 'learning_rate': 7.770666666666668e-06, 'epoch': 0.27}
+{'loss': 0.1751, 'grad_norm': 3.800967216491699, 'learning_rate': 7.737333333333335e-06, 'epoch': 0.28}
+{'loss': 0.1798, 'grad_norm': 4.212419509887695, 'learning_rate': 7.704000000000001e-06, 'epoch': 0.28}
+{'loss': 0.199, 'grad_norm': 3.5863020420074463, 'learning_rate': 7.670666666666668e-06, 'epoch': 0.28}
+{'loss': 0.1335, 'grad_norm': 3.1013996601104736, 'learning_rate': 7.637333333333334e-06, 'epoch': 0.28}
+{'loss': 0.0976, 'grad_norm': 2.2462713718414307, 'learning_rate': 7.604e-06, 'epoch': 0.29}
+{'loss': 0.0946, 'grad_norm': 2.9669203758239746, 'learning_rate': 7.570666666666668e-06, 'epoch': 0.29}
+{'loss': 0.0935, 'grad_norm': 2.645289897918701, 'learning_rate': 7.537333333333334e-06, 'epoch': 0.29}
+{'loss': 0.1045, 'grad_norm': 1.9715274572372437, 'learning_rate': 7.5040000000000005e-06, 'epoch': 0.3}
+{'loss': 0.0977, 'grad_norm': 2.1423373222351074, 'learning_rate': 7.470666666666667e-06, 'epoch': 0.3}
+{'loss': 0.1061, 'grad_norm': 2.029958963394165, 'learning_rate': 7.437333333333334e-06, 'epoch': 0.3}
+{'loss': 0.0998, 'grad_norm': 1.972732663154602, 'learning_rate': 7.404e-06, 'epoch': 0.31}
+{'loss': 0.1068, 'grad_norm': 2.2875239849090576, 'learning_rate': 7.370666666666667e-06, 'epoch': 0.31}
+{'loss': 0.1168, 'grad_norm': 3.1778981685638428, 'learning_rate': 7.337333333333334e-06, 'epoch': 0.31}
+{'loss': 0.1524, 'grad_norm': 3.360576868057251, 'learning_rate': 7.304000000000001e-06, 'epoch': 0.32}
+{'loss': 0.1483, 'grad_norm': 3.5467047691345215, 'learning_rate': 7.270666666666667e-06, 'epoch': 0.32}
+{'loss': 0.1775, 'grad_norm': 3.488696575164795, 'learning_rate': 7.237333333333334e-06, 'epoch': 0.32}
+{'loss': 0.135, 'grad_norm': 2.8800296783447266, 'learning_rate': 7.204000000000001e-06, 'epoch': 0.33}
+{'loss': 0.1108, 'grad_norm': 3.1020660400390625, 'learning_rate': 7.170666666666667e-06, 'epoch': 0.33}
+{'loss': 0.1002, 'grad_norm': 2.1233720779418945, 'learning_rate': 7.137333333333334e-06, 'epoch': 0.33}
+{'loss': 0.0941, 'grad_norm': 2.393425703048706, 'learning_rate': 7.104000000000001e-06, 'epoch': 0.33}
+{'loss': 0.0959, 'grad_norm': 2.295924186706543, 'learning_rate': 7.0706666666666665e-06, 'epoch': 0.34}
+{'loss': 0.1116, 'grad_norm': 1.8125039339065552, 'learning_rate': 7.037333333333334e-06, 'epoch': 0.34}
+{'loss': 0.1146, 'grad_norm': 3.006834030151367, 'learning_rate': 7.004000000000001e-06, 'epoch': 0.34}
+{'loss': 0.2029, 'grad_norm': 4.171006679534912, 'learning_rate': 6.970666666666667e-06, 'epoch': 0.35}
+{'loss': 0.1913, 'grad_norm': 3.68646240234375, 'learning_rate': 6.937333333333334e-06, 'epoch': 0.35}
+{'loss': 0.16, 'grad_norm': 3.7463300228118896, 'learning_rate': 6.904e-06, 'epoch': 0.35}
+{'loss': 0.1571, 'grad_norm': 3.069136381149292, 'learning_rate': 6.8706666666666676e-06, 'epoch': 0.36}
+{'loss': 0.1608, 'grad_norm': 3.17172908782959, 'learning_rate': 6.837333333333334e-06, 'epoch': 0.36}
+{'loss': 0.1546, 'grad_norm': 3.1673102378845215, 'learning_rate': 6.804e-06, 'epoch': 0.36}
+{'loss': 0.1282, 'grad_norm': 2.344193935394287, 'learning_rate': 6.770666666666668e-06, 'epoch': 0.37}
+{'loss': 0.0979, 'grad_norm': 2.5321226119995117, 'learning_rate': 6.737333333333333e-06, 'epoch': 0.37}
+{'loss': 0.1049, 'grad_norm': 2.2652363777160645, 'learning_rate': 6.7040000000000005e-06, 'epoch': 0.37}
+{'loss': 0.1433, 'grad_norm': 2.7856993675231934, 'learning_rate': 6.670666666666668e-06, 'epoch': 0.38}
+***** Running Evaluation *****
+[INFO|trainer.py:4180] 2025-02-12 16:32:16,805 >>   Num examples: Unknown
+[INFO|trainer.py:4181] 2025-02-12 16:32:16,805 >>   Batch size = 16
+[INFO|trainer_utils.py:837] 2025-02-12 16:32:24,994 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:25,165 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:26,282 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:27,776 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:29,144 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:30,273 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:31,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:32,739 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:33,771 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:34,884 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:35,931 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:36,990 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:38,037 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:39,057 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:39,996 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:41,081 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:42,014 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:42,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:43,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:44,890 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:45,808 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:46,811 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:47,793 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:48,768 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:49,750 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:50,750 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:51,749 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:52,812 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:53,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:54,901 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:55,908 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:56,978 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:58,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:32:59,089 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:00,086 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:01,176 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:02,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:03,217 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:04,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:05,324 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:07,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:08,515 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:09,566 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:10,508 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:11,497 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:12,472 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:13,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:14,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:15,525 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:16,645 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:17,663 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:18,689 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:19,731 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:20,661 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:21,675 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:22,678 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:23,752 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:24,673 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:25,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:26,653 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:27,622 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:28,693 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:29,685 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:30,756 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:31,779 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:32,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:33,577 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:34,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:35,631 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:36,667 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:37,676 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:38,674 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:39,764 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:40,785 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:41,856 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:42,864 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:43,891 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:44,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:45,810 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:46,829 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:47,795 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:48,759 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:49,713 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:50,734 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:51,773 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:52,802 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:53,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:54,763 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:55,881 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:56,931 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:58,078 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:33:59,112 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:00,159 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:01,125 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:02,082 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:03,167 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:04,200 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:05,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:06,211 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:07,179 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:08,207 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:09,226 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:10,272 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:11,252 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:12,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:13,418 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:14,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:15,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:16,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:17,519 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:18,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:19,486 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:20,519 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:21,470 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:22,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:23,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:24,377 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:25,447 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:26,486 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:27,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:28,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:29,488 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:30,428 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:31,462 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:32,460 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:33,428 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:34,392 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:35,367 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:36,383 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:37,360 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:38,347 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:39,325 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 16:34:40,266 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+ 38%|████████████████████████████████████████████████                                                                                | 3000/8000 [1:07:37<1:34:23,  1.13s/it][INFO|trainer.py:3860] 2025-02-12 16:34:48,564 >> Saving model checkpoint to ./checkpoint-3000
+{'eval_loss': 0.2720916271209717, 'eval_wer': 13.9010937646069, 'eval_runtime': 151.7576, 'eval_samples_per_second': 13.864, 'eval_steps_per_second': 0.87, 'epoch': 0.38}
+[INFO|configuration_utils.py:423] 2025-02-12 16:34:48,565 >> Configuration saved in ./checkpoint-3000/config.json
+[INFO|configuration_utils.py:906] 2025-02-12 16:34:48,566 >> Configuration saved in ./checkpoint-3000/generation_config.json
+[INFO|modeling_utils.py:3040] 2025-02-12 16:34:49,987 >> Model weights saved in ./checkpoint-3000/model.safetensors
+[INFO|feature_extraction_utils.py:437] 2025-02-12 16:34:49,988 >> Feature extractor saved in ./checkpoint-3000/preprocessor_config.json
+[INFO|feature_extraction_utils.py:437] 2025-02-12 16:34:53,620 >> Feature extractor saved in ./preprocessor_config.json
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:87: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return torch.tensor(value, **{**default_dtype, **self.torch_tensor_kwargs})
+ 38%|███████████████████████████████████████████████▋                                                                               | 3001/8000 [1:07:45<67:42:47, 48.76s/it]It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+02/12/2025 16:34:57 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+ 38%|████████████████████████████████████████████████▏                                                                               | 3009/8000 [1:07:56<5:31:43,  3.99s/it]/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+ 39%|█████████████████████████████████████████████████▌                                                                              | 3096/8000 [1:09:39<1:34:36,  1.16s/it]
+{'loss': 0.1758, 'grad_norm': 4.214677810668945, 'learning_rate': 6.637333333333333e-06, 'epoch': 0.38}
+{'loss': 0.1972, 'grad_norm': 4.144543647766113, 'learning_rate': 6.604000000000001e-06, 'epoch': 0.38}
+{'loss': 0.1293, 'grad_norm': 2.1775295734405518, 'learning_rate': 6.570666666666667e-06, 'epoch': 0.38}
diff --git a/wandb/run-20250212_152709-lejyafmi/files/requirements.txt b/wandb/run-20250212_152709-lejyafmi/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e48b00f9c3b676c687ab25917704b36ec3471d
--- /dev/null
+++ b/wandb/run-20250212_152709-lejyafmi/files/requirements.txt
@@ -0,0 +1,115 @@
+aiosignal==1.3.2
+Markdown==3.7
+more-itertools==10.6.0
+requests==2.32.3
+sentry-sdk==2.21.0
+torchaudio==2.6.0
+charset-normalizer==3.4.1
+docker-pycreds==0.4.0
+nvidia-cusolver-cu12==11.6.1.9
+PyYAML==6.0.2
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+setuptools==75.8.0
+nvidia-cufft-cu12==11.2.1.3
+joblib==1.4.2
+pytz==2025.1
+pip==24.0
+scikit-learn==1.6.1
+certifi==2025.1.31
+jiwer==3.1.0
+regex==2024.11.6
+annotated-types==0.7.0
+grpcio==1.70.0
+msgpack==1.1.0
+mpmath==1.3.0
+nvidia-cudnn-cu12==9.1.0.70
+soundfile==0.13.1
+dill==0.3.8
+nvidia-nvtx-cu12==12.4.127
+six==1.17.0
+nvidia-cuda-cupti-cu12==12.4.127
+pyarrow==19.0.0
+nvidia-nccl-cu12==2.21.5
+psutil==6.1.1
+decorator==5.1.1
+llvmlite==0.44.0
+frozenlist==1.5.0
+pydantic==2.10.6
+networkx==3.4.2
+idna==3.10
+wandb==0.19.6
+aiohttp==3.11.12
+RapidFuzz==3.12.1
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+numpy==2.1.3
+tokenizers==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+typing_extensions==4.12.2
+urllib3==2.3.0
+setproctitle==1.3.4
+tzdata==2025.1
+sympy==1.13.1
+pooch==1.8.2
+click==8.1.8
+pydantic_core==2.27.2
+MarkupSafe==3.0.2
+scipy==1.15.1
+accelerate==1.3.0
+tensorboard==2.19.0
+protobuf==5.29.3
+gitdb==4.0.12
+smmap==5.0.2
+absl-py==2.1.0
+tqdm==4.67.1
+yarl==1.18.3
+pycparser==2.22
+nvidia-cusparse-cu12==12.3.1.170
+attrs==25.1.0
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+GitPython==3.1.44
+safetensors==0.5.2
+fsspec==2024.12.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+filelock==3.17.0
+aiohappyeyeballs==2.4.6
+packaging==24.2
+datasets==3.2.1.dev0
+audioread==3.0.1
+propcache==0.2.1
+transformers==4.49.0.dev0
+nvidia-cuda-runtime-cu12==12.4.127
+cffi==1.17.1
+evaluate==0.4.3
+Werkzeug==3.1.3
+huggingface-hub==0.28.1
+Jinja2==3.1.5
+torch==2.6.0
+nvidia-curand-cu12==10.3.5.147
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+nvidia-cublas-cu12==12.4.5.8
+nvidia-nvjitlink-cu12==12.4.127
+triton==3.2.0
+numba==0.61.0
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+typeguard==4.3.0
+more-itertools==10.3.0
+tomli==2.0.1
+autocommand==2.2.2
+zipp==3.19.2
+typing_extensions==4.12.2
+backports.tarfile==1.2.0
+inflect==7.3.1
+jaraco.text==3.12.1
+wheel==0.43.0
+packaging==24.2
+jaraco.collections==5.1.0
+jaraco.functools==4.0.1
+jaraco.context==5.3.0
diff --git a/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json b/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..15c28b0022a20f8df79ea54c97b8c208b5b9a6cb
--- /dev/null
+++ b/wandb/run-20250212_152709-lejyafmi/files/wandb-metadata.json
@@ -0,0 +1,86 @@
+{
+  "os":  "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
+  "python":  "CPython 3.12.3",
+  "startedAt":  "2025-02-12T15:27:10.113915Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-small",
+    "--dataset_name=asierhv/composite_corpus_eu_v2.1",
+    "--language=basque",
+    "--train_split_name=train",
+    "--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
+    "--model_index_name=Whisper Small Basque",
+    "--max_steps=8000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=32",
+    "--per_device_eval_batch_size=16",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=1e-5",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=1000",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=225",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--audio_column_name=audio",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--use_auth_token",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-small-eu"
+  ],
+  "program":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-small-eu",
+    "commit":  "9c975864b20b4df94398a870e97cad2934253ec3"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/xezpeleta/whisper/whisper-small-eu",
+  "host":  "tknika",
+  "executable":  "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "NVIDIA L40-48Q",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "525987168256",
+      "used":  "315242278912"
+    }
+  },
+  "memory":  {
+    "total":  "33654022144"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}
\ No newline at end of file
diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log b/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..b6d594cdf89ccbf6c593c3f1737e25809a758e18
--- /dev/null
+++ b/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log
@@ -0,0 +1,6 @@
+{"time":"2025-02-12T15:27:09.931528058Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqvyk3k9m/port-243546.txt","pid":243546,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-02-12T15:27:09.940481802Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":243546}
+{"time":"2025-02-12T15:27:09.940461772Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38449,"Zone":""}}
+{"time":"2025-02-12T15:27:10.107162928Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:41928"}
+{"time":"2025-02-12T15:27:10.115855515Z","level":"INFO","msg":"handleInformInit: received","streamId":"lejyafmi","id":"127.0.0.1:41928"}
+{"time":"2025-02-12T15:27:10.220025051Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"lejyafmi","id":"127.0.0.1:41928"}
diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log b/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3e76e3e81a1059aef65345804a4f269bcd9183ad
--- /dev/null
+++ b/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
@@ -0,0 +1,7 @@
+{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
+{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
+{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
diff --git a/wandb/run-20250212_152709-lejyafmi/logs/debug.log b/wandb/run-20250212_152709-lejyafmi/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..d6ccd57adcf2bbe370a51abbc03e1e4a2718eb85
--- /dev/null
+++ b/wandb/run-20250212_152709-lejyafmi/logs/debug.log
@@ -0,0 +1,25 @@
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():756] calling init triggers
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-02-12 15:27:09,901 INFO    MainThread:243546 [wandb_init.py:init():789] starting backend
+2025-02-12 15:27:10,107 INFO    MainThread:243546 [wandb_init.py:init():793] sending inform_init request
+2025-02-12 15:27:10,112 INFO    MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-02-12 15:27:10,113 INFO    MainThread:243546 [wandb_init.py:init():808] backend started and connected
+2025-02-12 15:27:10,115 INFO    MainThread:243546 [wandb_init.py:init():901] updated telemetry
+2025-02-12 15:27:10,122 INFO    MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
+2025-02-12 15:27:10,584 INFO    MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
+2025-02-12 15:27:10,691 INFO    MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
+2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
+2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
+2025-02-12 15:27:10,692 INFO    MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
+2025-02-12 15:27:10,694 INFO    MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
+2025-02-12 15:27:10,698 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
+2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
+2025-02-12 15:27:10,704 INFO    MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
diff --git a/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb b/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..85a75160903e28b22d9fe2b0e421adfde04e5700
--- /dev/null
+++ b/wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb62a7585cf0849764ceaa30b52c9724bf548b7d6e46953aeb87de5d7296753
+size 1474560