diff --git "a/hydra_train.log" "b/hydra_train.log"
--- "a/hydra_train.log"
+++ "b/hydra_train.log"
@@ -1,10 +1,10 @@
-[2024-06-08 13:22:07,990][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 30000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 18000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 10000, 'hold_steps': 0, 'decay_steps': 20000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 30000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}}
-[2024-06-08 13:22:07,993][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViASP-LLM_v1.0
-[2024-06-08 13:22:07,993][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True}
-[2024-06-08 13:22:09,819][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViASP-LLM_v1.0
-[2024-06-08 13:22:09,819][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False}
-[2024-06-08 13:22:09,823][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True}
-[2024-06-08 13:22:16,988][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count(
+[2024-06-14 02:11:08,447][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 30000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 18000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 10000, 'hold_steps': 0, 'decay_steps': 20000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 30000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}}
+[2024-06-14 02:11:08,450][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViASP-LLM_v1.0
+[2024-06-14 02:11:08,450][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True}
+[2024-06-14 02:11:09,466][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViASP-LLM_v1.0
+[2024-06-14 02:11:09,466][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False}
+[2024-06-14 02:11:09,469][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True}
+[2024-06-14 02:11:13,424][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count(
   (encoder): HubertEncoderWrapper(
     (w2v_model): AVHubertModel(
       (feature_extractor_audio): SubModel(
@@ -212,730 +212,718 @@
   )
   (avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True)
 )
-[2024-06-08 13:22:16,994][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask
-[2024-06-08 13:22:16,994][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count
-[2024-06-08 13:22:16,994][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss
-[2024-06-08 13:22:16,997][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424)
-[2024-06-08 13:22:16,999][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0)
-[2024-06-08 13:22:17,000][src.vsp_llm_training][INFO] - Using tokenizer
-[2024-06-08 13:22:17,019][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 12663, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=75
-[2024-06-08 13:22:17,808][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/valid.wrd is sequence label. skipped
-[2024-06-08 13:22:17,808][src.vsp_llm_dataset][INFO] - image transform: Compose(
+[2024-06-14 02:11:13,429][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask
+[2024-06-14 02:11:13,429][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count
+[2024-06-14 02:11:13,429][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss
+[2024-06-14 02:11:13,433][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424)
+[2024-06-14 02:11:13,435][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0)
+[2024-06-14 02:11:13,436][src.vsp_llm_training][INFO] - Using tokenizer
+[2024-06-14 02:11:13,472][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76
+[2024-06-14 02:11:13,799][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/valid.wrd is sequence label. skipped
+[2024-06-14 02:11:13,800][src.vsp_llm_dataset][INFO] - image transform: Compose(
     Normalize(mean=0.0, std=255.0)
-    <src.utils_vsp_llm.CenterCrop object at 0x78bc09a24820>
+    <src.utils_vsp_llm.CenterCrop object at 0x7f202061d7c0>
     Normalize(mean=0.421, std=0.165)
 )
-[2024-06-08 13:22:17,809][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
-[2024-06-08 13:22:17,809][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,959][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,960][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,961][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias
-[2024-06-08 13:22:17,962][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,963][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,964][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,965][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,966][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,967][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias
-[2024-06-08 13:22:17,968][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias
-[2024-06-08 13:22:17,968][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
-[2024-06-08 13:22:17,968][fairseq.utils][INFO] - rank   0: capabilities =  8.6  ; total memory = 15.731 GB ; name = NVIDIA RTX A4000                        
-[2024-06-08 13:22:17,968][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
-[2024-06-08 13:22:17,968][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs)
-[2024-06-08 13:22:17,969][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1
-[2024-06-08 13:22:17,969][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt
-[2024-06-08 13:22:17,969][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt
-[2024-06-08 13:22:17,969][fairseq.trainer][INFO] - loading train data for epoch 1
-[2024-06-08 13:22:17,969][src.vsp_llm_training][INFO] - Using tokenizer
-[2024-06-08 13:22:18,115][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 101180, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73
-[2024-06-08 13:22:18,465][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/train.wrd is sequence label. skipped
-[2024-06-08 13:22:18,465][src.vsp_llm_dataset][INFO] - image transform: Compose(
+[2024-06-14 02:11:13,800][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
+[2024-06-14 02:11:13,800][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
+[2024-06-14 02:11:13,926][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias
+[2024-06-14 02:11:13,927][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,928][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias
+[2024-06-14 02:11:13,929][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,930][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias
+[2024-06-14 02:11:13,931][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,932][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,933][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias
+[2024-06-14 02:11:13,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias
+[2024-06-14 02:11:13,935][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
+[2024-06-14 02:11:13,935][fairseq.utils][INFO] - rank   0: capabilities =  8.6  ; total memory = 15.729 GB ; name = NVIDIA RTX A4000                        
+[2024-06-14 02:11:13,935][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
+[2024-06-14 02:11:13,935][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs)
+[2024-06-14 02:11:13,935][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1
+[2024-06-14 02:11:13,935][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt
+[2024-06-14 02:11:13,936][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt
+[2024-06-14 02:11:13,936][fairseq.trainer][INFO] - loading train data for epoch 1
+[2024-06-14 02:11:13,936][src.vsp_llm_training][INFO] - Using tokenizer
+[2024-06-14 02:11:14,100][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 120686, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73
+[2024-06-14 02:11:14,947][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/train.wrd is sequence label. skipped
+[2024-06-14 02:11:14,947][src.vsp_llm_dataset][INFO] - image transform: Compose(
     Normalize(mean=0.0, std=255.0)
     RandomCrop(size=(88, 88))
-    <src.utils_vsp_llm.HorizontalFlip object at 0x78bbb79baa90>
+    <src.utils_vsp_llm.HorizontalFlip object at 0x7f2020b304f0>
     Normalize(mean=0.421, std=0.165)
 )
-[2024-06-08 13:22:18,465][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
-[2024-06-08 13:22:18,465][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
-[2024-06-08 13:22:23,141][fairseq.trainer][INFO] - begin training epoch 1
-[2024-06-08 13:22:23,141][fairseq_cli.train][INFO] - Start iterating over samples
-[2024-06-08 13:27:44,208][train_inner][INFO] - {"epoch": 1, "update": 0.016, "loss": "7.578", "ntokens": "128.135", "acc_total": "128.135", "n_correct": "18.82", "wer_total": "128.135", "n_error": "109.28", "ppl": "191.12", "accuracy": "14.688", "wer": "85.285", "wps": "80", "ups": "0.62", "wpb": "128.1", "bsz": "8", "num_updates": "200", "lr": "1.49e-05", "gnorm": "8.606", "loss_scale": "128", "train_wall": "320", "gb_free": "7.1", "wall": "326"}
-[2024-06-08 13:33:07,669][train_inner][INFO] - {"epoch": 1, "update": 0.032, "loss": "6.232", "ntokens": "128.27", "acc_total": "128.27", "n_correct": "25.92", "wer_total": "128.27", "n_error": "102.16", "ppl": "75.16", "accuracy": "20.207", "wer": "79.644", "wps": "79.3", "ups": "0.62", "wpb": "128.3", "bsz": "8", "num_updates": "400", "lr": "2.48e-05", "gnorm": "3.575", "loss_scale": "128", "train_wall": "323", "gb_free": "7.1", "wall": "650"}
-[2024-06-08 13:38:30,946][train_inner][INFO] - {"epoch": 1, "update": 0.047, "loss": "6.055", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "28.825", "wer_total": "127.235", "n_error": "98.1", "ppl": "66.49", "accuracy": "22.655", "wer": "77.101", "wps": "78.7", "ups": "0.62", "wpb": "127.2", "bsz": "8", "num_updates": "600", "lr": "3.47e-05", "gnorm": "3.892", "loss_scale": "128", "train_wall": "323", "gb_free": "7.1", "wall": "973"}
-[2024-06-08 13:43:54,238][train_inner][INFO] - {"epoch": 1, "update": 0.063, "loss": "5.952", "ntokens": "128.155", "acc_total": "128.155", "n_correct": "30.315", "wer_total": "128.155", "n_error": "97.545", "ppl": "61.93", "accuracy": "23.655", "wer": "76.115", "wps": "79.3", "ups": "0.62", "wpb": "128.2", "bsz": "8", "num_updates": "800", "lr": "4.46e-05", "gnorm": "3.945", "loss_scale": "128", "train_wall": "323", "gb_free": "7.1", "wall": "1296"}
-[2024-06-08 13:49:17,549][train_inner][INFO] - {"epoch": 1, "update": 0.079, "loss": "5.9", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "30.18", "wer_total": "127.52", "n_error": "97.04", "ppl": "59.72", "accuracy": "23.667", "wer": "76.098", "wps": "78.9", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "1000", "lr": "5.45e-05", "gnorm": "3.795", "loss_scale": "128", "train_wall": "323", "gb_free": "7.1", "wall": "1620"}
-[2024-06-08 13:54:40,998][train_inner][INFO] - {"epoch": 1, "update": 0.095, "loss": "5.865", "ntokens": "127.845", "acc_total": "127.845", "n_correct": "30.985", "wer_total": "127.845", "n_error": "96.63", "ppl": "58.28", "accuracy": "24.236", "wer": "75.584", "wps": "79.1", "ups": "0.62", "wpb": "127.8", "bsz": "8", "num_updates": "1200", "lr": "6.44e-05", "gnorm": "3.479", "loss_scale": "128", "train_wall": "323", "gb_free": "7.1", "wall": "1943"}
-[2024-06-08 14:00:04,433][train_inner][INFO] - {"epoch": 1, "update": 0.111, "loss": "5.832", "ntokens": "128.09", "acc_total": "128.09", "n_correct": "31.66", "wer_total": "128.09", "n_error": "96.195", "ppl": "56.96", "accuracy": "24.717", "wer": "75.1", "wps": "79.2", "ups": "0.62", "wpb": "128.1", "bsz": "8", "num_updates": "1400", "lr": "7.43e-05", "gnorm": "3.346", "loss_scale": "128", "train_wall": "323", "gb_free": "7.1", "wall": "2266"}
-[2024-06-08 14:05:27,634][train_inner][INFO] - {"epoch": 1, "update": 0.127, "loss": "5.829", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "31.615", "wer_total": "127.155", "n_error": "95.3", "ppl": "56.83", "accuracy": "24.863", "wer": "74.948", "wps": "78.7", "ups": "0.62", "wpb": "127.2", "bsz": "8", "num_updates": "1600", "lr": "8.42e-05", "gnorm": "2.961", "loss_scale": "128", "train_wall": "322", "gb_free": "7.1", "wall": "2590"}
-[2024-06-08 14:10:50,500][train_inner][INFO] - {"epoch": 1, "update": 0.142, "loss": "5.747", "ntokens": "128.27", "acc_total": "128.27", "n_correct": "32.4", "wer_total": "128.27", "n_error": "95.55", "ppl": "53.72", "accuracy": "25.259", "wer": "74.491", "wps": "79.5", "ups": "0.62", "wpb": "128.3", "bsz": "8", "num_updates": "1800", "lr": "9.41e-05", "gnorm": "2.807", "loss_scale": "128", "train_wall": "322", "gb_free": "7.1", "wall": "2913"}
-[2024-06-08 14:16:13,202][train_inner][INFO] - {"epoch": 1, "update": 0.158, "loss": "5.728", "ntokens": "127.54", "acc_total": "127.54", "n_correct": "32.62", "wer_total": "127.54", "n_error": "94.66", "ppl": "53", "accuracy": "25.576", "wer": "74.22", "wps": "79", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "2000", "lr": "0.000104", "gnorm": "2.662", "loss_scale": "128", "train_wall": "322", "gb_free": "7.1", "wall": "3235"}
-[2024-06-08 14:21:36,313][train_inner][INFO] - {"epoch": 1, "update": 0.174, "loss": "5.693", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "33.495", "wer_total": "127.94", "n_error": "94.265", "ppl": "51.74", "accuracy": "26.18", "wer": "73.679", "wps": "79.2", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "2200", "lr": "0.0001139", "gnorm": "2.54", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "3558"}
-[2024-06-08 14:26:59,200][train_inner][INFO] - {"epoch": 1, "update": 0.19, "loss": "5.643", "ntokens": "129.415", "acc_total": "129.415", "n_correct": "35.02", "wer_total": "129.415", "n_error": "94.165", "ppl": "49.96", "accuracy": "27.06", "wer": "72.762", "wps": "80.2", "ups": "0.62", "wpb": "129.4", "bsz": "8", "num_updates": "2400", "lr": "0.0001238", "gnorm": "2.531", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "3881"}
-[2024-06-08 14:29:40,612][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 14:51:00,458][valid][INFO] - {"epoch": 1, "valid_loss": "5.471", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "4.51544", "valid_wer_total": "15.5723", "valid_n_error": "11.0269", "valid_ppl": "44.34", "valid_accuracy": "28.997", "valid_wer": "70.811", "valid_wps": "154.1", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "2500"}
-[2024-06-08 14:51:00,459][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 2500 updates
-[2024-06-08 14:51:00,459][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_2500.pt
-[2024-06-08 14:51:03,603][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_2500.pt
-[2024-06-08 14:51:06,447][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_2500.pt (epoch 1 @ 2500 updates, score 28.997) (writing took 5.988429816963617 seconds)
-[2024-06-08 14:53:47,461][train_inner][INFO] - {"epoch": 1, "update": 0.206, "loss": "5.498", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "36.73", "wer_total": "127.155", "n_error": "90.2", "ppl": "45.19", "accuracy": "28.886", "wer": "70.937", "wps": "15.8", "ups": "0.12", "wpb": "127.2", "bsz": "8", "num_updates": "2600", "lr": "0.0001337", "gnorm": "2.696", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "5489"}
-[2024-06-08 14:59:10,185][train_inner][INFO] - {"epoch": 1, "update": 0.221, "loss": "5.404", "ntokens": "129.095", "acc_total": "129.095", "n_correct": "38.935", "wer_total": "129.095", "n_error": "89.875", "ppl": "42.34", "accuracy": "30.16", "wer": "69.619", "wps": "80", "ups": "0.62", "wpb": "129.1", "bsz": "8", "num_updates": "2800", "lr": "0.0001436", "gnorm": "2.829", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "5812"}
-[2024-06-08 15:04:32,825][train_inner][INFO] - {"epoch": 1, "update": 0.237, "loss": "5.349", "ntokens": "129.18", "acc_total": "129.18", "n_correct": "40.565", "wer_total": "129.18", "n_error": "88.33", "ppl": "40.75", "accuracy": "31.402", "wer": "68.377", "wps": "80.1", "ups": "0.62", "wpb": "129.2", "bsz": "8", "num_updates": "3000", "lr": "0.0001535", "gnorm": "3.036", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "6135"}
-[2024-06-08 15:09:55,619][train_inner][INFO] - {"epoch": 1, "update": 0.253, "loss": "5.097", "ntokens": "128.19", "acc_total": "128.19", "n_correct": "43.795", "wer_total": "128.19", "n_error": "84.11", "ppl": "34.24", "accuracy": "34.164", "wer": "65.614", "wps": "79.4", "ups": "0.62", "wpb": "128.2", "bsz": "8", "num_updates": "3200", "lr": "0.0001634", "gnorm": "3.388", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "6458"}
-[2024-06-08 15:15:18,520][train_inner][INFO] - {"epoch": 1, "update": 0.269, "loss": "4.847", "ntokens": "127.955", "acc_total": "127.955", "n_correct": "47.56", "wer_total": "127.955", "n_error": "80.14", "ppl": "28.77", "accuracy": "37.169", "wer": "62.631", "wps": "79.3", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "3400", "lr": "0.0001733", "gnorm": "3.596", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "6781"}
-[2024-06-08 15:20:41,418][train_inner][INFO] - {"epoch": 1, "update": 0.285, "loss": "4.7", "ntokens": "128.005", "acc_total": "128.005", "n_correct": "49.38", "wer_total": "128.005", "n_error": "78.385", "ppl": "25.98", "accuracy": "38.577", "wer": "61.236", "wps": "79.3", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "3600", "lr": "0.0001832", "gnorm": "3.766", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "7103"}
-[2024-06-08 15:26:04,008][train_inner][INFO] - {"epoch": 1, "update": 0.3, "loss": "4.546", "ntokens": "127.875", "acc_total": "127.875", "n_correct": "51.745", "wer_total": "127.875", "n_error": "75.945", "ppl": "23.36", "accuracy": "40.465", "wer": "59.39", "wps": "79.3", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "3800", "lr": "0.0001931", "gnorm": "3.797", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "7426"}
-[2024-06-08 15:31:26,883][train_inner][INFO] - {"epoch": 1, "update": 0.316, "loss": "4.344", "ntokens": "128.9", "acc_total": "128.9", "n_correct": "54.555", "wer_total": "128.9", "n_error": "74.145", "ppl": "20.31", "accuracy": "42.324", "wer": "57.521", "wps": "79.8", "ups": "0.62", "wpb": "128.9", "bsz": "8", "num_updates": "4000", "lr": "0.000203", "gnorm": "3.977", "loss_scale": "256", "train_wall": "322", "gb_free": "7.1", "wall": "7749"}
-[2024-06-08 15:36:49,415][train_inner][INFO] - {"epoch": 1, "update": 0.332, "loss": "4.205", "ntokens": "127.815", "acc_total": "127.815", "n_correct": "56.145", "wer_total": "127.815", "n_error": "71.48", "ppl": "18.45", "accuracy": "43.927", "wer": "55.925", "wps": "79.3", "ups": "0.62", "wpb": "127.8", "bsz": "8", "num_updates": "4200", "lr": "0.0002129", "gnorm": "4.009", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "8071"}
-[2024-06-08 15:42:12,275][train_inner][INFO] - {"epoch": 1, "update": 0.348, "loss": "4.099", "ntokens": "128.615", "acc_total": "128.615", "n_correct": "57.855", "wer_total": "128.615", "n_error": "70.56", "ppl": "17.14", "accuracy": "44.983", "wer": "54.861", "wps": "79.7", "ups": "0.62", "wpb": "128.6", "bsz": "8", "num_updates": "4400", "lr": "0.0002228", "gnorm": "3.958", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "8394"}
-[2024-06-08 15:47:35,080][train_inner][INFO] - {"epoch": 1, "update": 0.364, "loss": "3.907", "ntokens": "128.235", "acc_total": "128.235", "n_correct": "59.7", "wer_total": "128.235", "n_error": "68.32", "ppl": "15", "accuracy": "46.555", "wer": "53.277", "wps": "79.5", "ups": "0.62", "wpb": "128.2", "bsz": "8", "num_updates": "4600", "lr": "0.0002327", "gnorm": "3.965", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "8717"}
-[2024-06-08 15:52:57,894][train_inner][INFO] - {"epoch": 1, "update": 0.38, "loss": "3.879", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "59.795", "wer_total": "127.94", "n_error": "67.99", "ppl": "14.71", "accuracy": "46.737", "wer": "53.142", "wps": "79.3", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "4800", "lr": "0.0002426", "gnorm": "4.038", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "9040"}
-[2024-06-08 15:58:20,612][train_inner][INFO] - {"epoch": 1, "update": 0.395, "loss": "3.708", "ntokens": "129.36", "acc_total": "129.36", "n_correct": "62.245", "wer_total": "129.36", "n_error": "66.93", "ppl": "13.07", "accuracy": "48.118", "wer": "51.739", "wps": "80.2", "ups": "0.62", "wpb": "129.4", "bsz": "8", "num_updates": "5000", "lr": "0.0002525", "gnorm": "4.039", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "9363"}
-[2024-06-08 15:58:20,612][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 16:19:41,313][valid][INFO] - {"epoch": 1, "valid_loss": "3.638", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "7.61849", "valid_wer_total": "15.5723", "valid_n_error": "7.93635", "valid_ppl": "12.45", "valid_accuracy": "48.923", "valid_wer": "50.965", "valid_wps": "154", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "5000", "valid_best_accuracy": "48.923"}
-[2024-06-08 16:19:41,314][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 5000 updates
-[2024-06-08 16:19:41,314][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_5000.pt
-[2024-06-08 16:19:44,478][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_5000.pt
-[2024-06-08 16:19:48,736][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_5000.pt (epoch 1 @ 5000 updates, score 48.923) (writing took 7.421530130028259 seconds)
-[2024-06-08 16:25:11,245][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "3.672", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "62.605", "wer_total": "127.41", "n_error": "64.675", "ppl": "12.74", "accuracy": "49.137", "wer": "50.761", "wps": "15.8", "ups": "0.12", "wpb": "127.4", "bsz": "8", "num_updates": "5200", "lr": "0.0002624", "gnorm": "4.017", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "10973"}
-[2024-06-08 16:30:34,107][train_inner][INFO] - {"epoch": 1, "update": 0.427, "loss": "3.534", "ntokens": "127.99", "acc_total": "127.99", "n_correct": "64.215", "wer_total": "127.99", "n_error": "63.615", "ppl": "11.58", "accuracy": "50.172", "wer": "49.703", "wps": "79.3", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "5400", "lr": "0.0002723", "gnorm": "3.964", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "11296"}
-[2024-06-08 16:35:56,865][train_inner][INFO] - {"epoch": 1, "update": 0.443, "loss": "3.429", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "64.865", "wer_total": "126.81", "n_error": "61.77", "ppl": "10.77", "accuracy": "51.151", "wer": "48.711", "wps": "78.6", "ups": "0.62", "wpb": "126.8", "bsz": "8", "num_updates": "5600", "lr": "0.0002822", "gnorm": "3.959", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "11619"}
-[2024-06-08 16:41:19,654][train_inner][INFO] - {"epoch": 1, "update": 0.459, "loss": "3.393", "ntokens": "128.39", "acc_total": "128.39", "n_correct": "66.64", "wer_total": "128.39", "n_error": "61.65", "ppl": "10.51", "accuracy": "51.904", "wer": "48.018", "wps": "79.6", "ups": "0.62", "wpb": "128.4", "bsz": "8", "num_updates": "5800", "lr": "0.0002921", "gnorm": "4.012", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "11942"}
-[2024-06-08 16:46:42,313][train_inner][INFO] - {"epoch": 1, "update": 0.474, "loss": "3.314", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "66.18", "wer_total": "127.425", "n_error": "61.075", "ppl": "9.95", "accuracy": "51.936", "wer": "47.93", "wps": "79", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "6000", "lr": "0.000302", "gnorm": "3.946", "loss_scale": "512", "train_wall": "322", "gb_free": "7.1", "wall": "12264"}
-[2024-06-08 16:52:05,096][train_inner][INFO] - {"epoch": 1, "update": 0.49, "loss": "3.194", "ntokens": "127.935", "acc_total": "127.935", "n_correct": "67.88", "wer_total": "127.935", "n_error": "59.925", "ppl": "9.15", "accuracy": "53.058", "wer": "46.84", "wps": "79.3", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "6200", "lr": "0.0003119", "gnorm": "3.932", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "12587"}
-[2024-06-08 16:57:27,797][train_inner][INFO] - {"epoch": 1, "update": 0.506, "loss": "3.143", "ntokens": "128.395", "acc_total": "128.395", "n_correct": "69.22", "wer_total": "128.395", "n_error": "59.055", "ppl": "8.83", "accuracy": "53.912", "wer": "45.995", "wps": "79.6", "ups": "0.62", "wpb": "128.4", "bsz": "8", "num_updates": "6400", "lr": "0.0003218", "gnorm": "3.938", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "12910"}
-[2024-06-08 17:02:50,352][train_inner][INFO] - {"epoch": 1, "update": 0.522, "loss": "3.135", "ntokens": "127.92", "acc_total": "127.92", "n_correct": "69.49", "wer_total": "127.92", "n_error": "58.325", "ppl": "8.79", "accuracy": "54.323", "wer": "45.595", "wps": "79.3", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "6600", "lr": "0.0003317", "gnorm": "3.884", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "13232"}
-[2024-06-08 17:08:12,785][train_inner][INFO] - {"epoch": 1, "update": 0.538, "loss": "3.076", "ntokens": "128.695", "acc_total": "128.695", "n_correct": "70.61", "wer_total": "128.695", "n_error": "57.99", "ppl": "8.44", "accuracy": "54.866", "wer": "45.06", "wps": "79.8", "ups": "0.62", "wpb": "128.7", "bsz": "8", "num_updates": "6800", "lr": "0.0003416", "gnorm": "3.871", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "13555"}
-[2024-06-08 17:13:35,174][train_inner][INFO] - {"epoch": 1, "update": 0.553, "loss": "2.977", "ntokens": "128.315", "acc_total": "128.315", "n_correct": "71.53", "wer_total": "128.315", "n_error": "56.635", "ppl": "7.87", "accuracy": "55.746", "wer": "44.137", "wps": "79.6", "ups": "0.62", "wpb": "128.3", "bsz": "8", "num_updates": "7000", "lr": "0.0003515", "gnorm": "3.828", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "13877"}
-[2024-06-08 17:18:57,578][train_inner][INFO] - {"epoch": 1, "update": 0.569, "loss": "2.94", "ntokens": "128.155", "acc_total": "128.155", "n_correct": "71.595", "wer_total": "128.155", "n_error": "56.44", "ppl": "7.68", "accuracy": "55.866", "wer": "44.04", "wps": "79.5", "ups": "0.62", "wpb": "128.2", "bsz": "8", "num_updates": "7200", "lr": "0.0003614", "gnorm": "3.904", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "14200"}
-[2024-06-08 17:24:19,730][train_inner][INFO] - {"epoch": 1, "update": 0.585, "loss": "2.945", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "71.48", "wer_total": "126.975", "n_error": "55.395", "ppl": "7.7", "accuracy": "56.295", "wer": "43.627", "wps": "78.8", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "7400", "lr": "0.0003713", "gnorm": "3.902", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "14522"}
-[2024-06-08 17:27:00,780][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 17:48:18,301][valid][INFO] - {"epoch": 1, "valid_loss": "2.907", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "8.75559", "valid_wer_total": "15.5723", "valid_n_error": "6.79144", "valid_ppl": "7.5", "valid_accuracy": "56.225", "valid_wer": "43.612", "valid_wps": "154.4", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "7500", "valid_best_accuracy": "56.225"}
-[2024-06-08 17:48:18,301][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 7500 updates
-[2024-06-08 17:48:18,302][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_7500.pt
-[2024-06-08 17:48:21,499][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_7500.pt
-[2024-06-08 17:48:26,650][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_7500.pt (epoch 1 @ 7500 updates, score 56.225) (writing took 8.348442672984675 seconds)
-[2024-06-08 17:51:07,314][train_inner][INFO] - {"epoch": 1, "update": 0.601, "loss": "2.888", "ntokens": "128.245", "acc_total": "128.245", "n_correct": "72.335", "wer_total": "128.245", "n_error": "55.815", "ppl": "7.4", "accuracy": "56.404", "wer": "43.522", "wps": "16", "ups": "0.12", "wpb": "128.2", "bsz": "8", "num_updates": "7600", "lr": "0.0003812", "gnorm": "3.914", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "16129"}
-[2024-06-08 17:56:29,549][train_inner][INFO] - {"epoch": 1, "update": 0.617, "loss": "2.847", "ntokens": "128.505", "acc_total": "128.505", "n_correct": "73.015", "wer_total": "128.505", "n_error": "55.395", "ppl": "7.19", "accuracy": "56.819", "wer": "43.107", "wps": "79.8", "ups": "0.62", "wpb": "128.5", "bsz": "8", "num_updates": "7800", "lr": "0.0003911", "gnorm": "3.839", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "16452"}
-[2024-06-08 18:01:51,768][train_inner][INFO] - {"epoch": 1, "update": 0.633, "loss": "2.905", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "72.17", "wer_total": "127.52", "n_error": "55.24", "ppl": "7.49", "accuracy": "56.595", "wer": "43.319", "wps": "79.2", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "8000", "lr": "0.000401", "gnorm": "3.971", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "16774"}
-[2024-06-08 18:07:13,983][train_inner][INFO] - {"epoch": 1, "update": 0.648, "loss": "2.806", "ntokens": "128.09", "acc_total": "128.09", "n_correct": "73.2", "wer_total": "128.09", "n_error": "54.78", "ppl": "6.99", "accuracy": "57.147", "wer": "42.767", "wps": "79.5", "ups": "0.62", "wpb": "128.1", "bsz": "8", "num_updates": "8200", "lr": "0.0004109", "gnorm": "3.954", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "17096"}
-[2024-06-08 18:12:36,267][train_inner][INFO] - {"epoch": 1, "update": 0.664, "loss": "2.82", "ntokens": "127.165", "acc_total": "127.165", "n_correct": "73.66", "wer_total": "127.165", "n_error": "53.38", "ppl": "7.06", "accuracy": "57.925", "wer": "41.977", "wps": "78.9", "ups": "0.62", "wpb": "127.2", "bsz": "8", "num_updates": "8400", "lr": "0.0004208", "gnorm": "3.858", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "17418"}
-[2024-06-08 18:17:58,606][train_inner][INFO] - {"epoch": 1, "update": 0.68, "loss": "2.687", "ntokens": "128.38", "acc_total": "128.38", "n_correct": "75.61", "wer_total": "128.38", "n_error": "52.67", "ppl": "6.44", "accuracy": "58.895", "wer": "41.027", "wps": "79.7", "ups": "0.62", "wpb": "128.4", "bsz": "8", "num_updates": "8600", "lr": "0.0004307", "gnorm": "3.921", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "17741"}
-[2024-06-08 18:23:20,876][train_inner][INFO] - {"epoch": 1, "update": 0.696, "loss": "2.735", "ntokens": "128.605", "acc_total": "128.605", "n_correct": "74.93", "wer_total": "128.605", "n_error": "53.565", "ppl": "6.66", "accuracy": "58.264", "wer": "41.651", "wps": "79.8", "ups": "0.62", "wpb": "128.6", "bsz": "8", "num_updates": "8800", "lr": "0.0004406", "gnorm": "3.909", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "18063"}
-[2024-06-08 18:28:43,301][train_inner][INFO] - {"epoch": 1, "update": 0.712, "loss": "2.661", "ntokens": "128.05", "acc_total": "128.05", "n_correct": "75.205", "wer_total": "128.05", "n_error": "52.76", "ppl": "6.32", "accuracy": "58.731", "wer": "41.203", "wps": "79.4", "ups": "0.62", "wpb": "128.1", "bsz": "8", "num_updates": "9000", "lr": "0.0004505", "gnorm": "3.841", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "18385"}
-[2024-06-08 18:34:05,423][train_inner][INFO] - {"epoch": 1, "update": 0.727, "loss": "2.642", "ntokens": "128.705", "acc_total": "128.705", "n_correct": "75.825", "wer_total": "128.705", "n_error": "52.785", "ppl": "6.24", "accuracy": "58.914", "wer": "41.012", "wps": "79.9", "ups": "0.62", "wpb": "128.7", "bsz": "8", "num_updates": "9200", "lr": "0.0004604", "gnorm": "3.866", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "18707"}
-[2024-06-08 18:39:27,843][train_inner][INFO] - {"epoch": 1, "update": 0.743, "loss": "2.672", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "75.655", "wer_total": "126.76", "n_error": "51.025", "ppl": "6.37", "accuracy": "59.684", "wer": "40.253", "wps": "78.6", "ups": "0.62", "wpb": "126.8", "bsz": "8", "num_updates": "9400", "lr": "0.0004703", "gnorm": "3.951", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "19030"}
-[2024-06-08 18:44:49,958][train_inner][INFO] - {"epoch": 1, "update": 0.759, "loss": "2.652", "ntokens": "127.53", "acc_total": "127.53", "n_correct": "75.565", "wer_total": "127.53", "n_error": "51.89", "ppl": "6.29", "accuracy": "59.253", "wer": "40.688", "wps": "79.2", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "9600", "lr": "0.0004802", "gnorm": "3.942", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "19352"}
-[2024-06-08 18:50:12,320][train_inner][INFO] - {"epoch": 1, "update": 0.775, "loss": "2.635", "ntokens": "128.81", "acc_total": "128.81", "n_correct": "77.015", "wer_total": "128.81", "n_error": "51.7", "ppl": "6.21", "accuracy": "59.79", "wer": "40.137", "wps": "79.9", "ups": "0.62", "wpb": "128.8", "bsz": "8", "num_updates": "9800", "lr": "0.0004901", "gnorm": "3.946", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "19674"}
-[2024-06-08 18:55:34,493][train_inner][INFO] - {"epoch": 1, "update": 0.791, "loss": "2.652", "ntokens": "128.93", "acc_total": "128.93", "n_correct": "76.055", "wer_total": "128.93", "n_error": "52.81", "ppl": "6.29", "accuracy": "58.989", "wer": "40.96", "wps": "80", "ups": "0.62", "wpb": "128.9", "bsz": "8", "num_updates": "10000", "lr": "0.0005", "gnorm": "4.075", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "19997"}
-[2024-06-08 18:55:34,493][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 19:16:51,259][valid][INFO] - {"epoch": 1, "valid_loss": "2.582", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "9.29519", "valid_wer_total": "15.5723", "valid_n_error": "6.26423", "valid_ppl": "5.99", "valid_accuracy": "59.691", "valid_wer": "40.227", "valid_wps": "154.5", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "10000", "valid_best_accuracy": "59.691"}
-[2024-06-08 19:16:51,260][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 10000 updates
-[2024-06-08 19:16:51,260][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_10000.pt
-[2024-06-08 19:16:54,439][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_10000.pt
-[2024-06-08 19:17:05,913][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_10000.pt (epoch 1 @ 10000 updates, score 59.691) (writing took 14.652903156995308 seconds)
-[2024-06-08 19:22:28,064][train_inner][INFO] - {"epoch": 1, "update": 0.806, "loss": "2.605", "ntokens": "128.81", "acc_total": "128.81", "n_correct": "77.185", "wer_total": "128.81", "n_error": "51.565", "ppl": "6.08", "accuracy": "59.922", "wer": "40.032", "wps": "16", "ups": "0.12", "wpb": "128.8", "bsz": "8", "num_updates": "10200", "lr": "0.000485243", "gnorm": "4.021", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "21610"}
-[2024-06-08 19:24:27,285][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 19:27:51,875][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "2.56", "ntokens": "127.93", "acc_total": "127.93", "n_correct": "79.055", "wer_total": "127.93", "n_error": "48.845", "ppl": "5.9", "accuracy": "61.796", "wer": "38.181", "wps": "79", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "10400", "lr": "0.000470922", "gnorm": "3.839", "loss_scale": "2048", "train_wall": "323", "gb_free": "7.1", "wall": "21934"}
-[2024-06-08 19:33:13,931][train_inner][INFO] - {"epoch": 1, "update": 0.838, "loss": "2.523", "ntokens": "127.445", "acc_total": "127.445", "n_correct": "77.46", "wer_total": "127.445", "n_error": "49.86", "ppl": "5.75", "accuracy": "60.779", "wer": "39.123", "wps": "79.1", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "10600", "lr": "0.000457024", "gnorm": "4.025", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "22256"}
-[2024-06-08 19:38:36,286][train_inner][INFO] - {"epoch": 1, "update": 0.854, "loss": "2.398", "ntokens": "128.18", "acc_total": "128.18", "n_correct": "78.985", "wer_total": "128.18", "n_error": "49.095", "ppl": "5.27", "accuracy": "61.62", "wer": "38.302", "wps": "79.5", "ups": "0.62", "wpb": "128.2", "bsz": "8", "num_updates": "10800", "lr": "0.000443536", "gnorm": "3.987", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "22578"}
-[2024-06-08 19:43:58,358][train_inner][INFO] - {"epoch": 1, "update": 0.87, "loss": "2.419", "ntokens": "127.95", "acc_total": "127.95", "n_correct": "79.135", "wer_total": "127.95", "n_error": "48.755", "ppl": "5.35", "accuracy": "61.848", "wer": "38.105", "wps": "79.5", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "11000", "lr": "0.000430446", "gnorm": "3.811", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "22900"}
-[2024-06-08 19:49:20,533][train_inner][INFO] - {"epoch": 1, "update": 0.886, "loss": "2.457", "ntokens": "127.915", "acc_total": "127.915", "n_correct": "78.81", "wer_total": "127.915", "n_error": "49", "ppl": "5.49", "accuracy": "61.611", "wer": "38.307", "wps": "79.4", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "11200", "lr": "0.000417742", "gnorm": "3.81", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "23223"}
-[2024-06-08 19:54:42,648][train_inner][INFO] - {"epoch": 1, "update": 0.901, "loss": "2.417", "ntokens": "127.985", "acc_total": "127.985", "n_correct": "79.705", "wer_total": "127.985", "n_error": "48.23", "ppl": "5.34", "accuracy": "62.277", "wer": "37.684", "wps": "79.5", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "11400", "lr": "0.000405413", "gnorm": "3.956", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "23545"}
-[2024-06-08 20:00:04,821][train_inner][INFO] - {"epoch": 1, "update": 0.917, "loss": "2.382", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "79.1", "wer_total": "126.795", "n_error": "47.64", "ppl": "5.21", "accuracy": "62.384", "wer": "37.572", "wps": "78.7", "ups": "0.62", "wpb": "126.8", "bsz": "8", "num_updates": "11600", "lr": "0.000393448", "gnorm": "3.967", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "23867"}
-[2024-06-08 20:05:27,041][train_inner][INFO] - {"epoch": 1, "update": 0.933, "loss": "2.338", "ntokens": "127.205", "acc_total": "127.205", "n_correct": "79.61", "wer_total": "127.205", "n_error": "47.535", "ppl": "5.06", "accuracy": "62.584", "wer": "37.369", "wps": "79", "ups": "0.62", "wpb": "127.2", "bsz": "8", "num_updates": "11800", "lr": "0.000381836", "gnorm": "3.826", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "24189"}
-[2024-06-08 20:10:49,170][train_inner][INFO] - {"epoch": 1, "update": 0.949, "loss": "2.268", "ntokens": "128.37", "acc_total": "128.37", "n_correct": "81.66", "wer_total": "128.37", "n_error": "46.65", "ppl": "4.82", "accuracy": "63.613", "wer": "36.34", "wps": "79.7", "ups": "0.62", "wpb": "128.4", "bsz": "8", "num_updates": "12000", "lr": "0.000370567", "gnorm": "3.685", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "24511"}
-[2024-06-08 20:16:11,190][train_inner][INFO] - {"epoch": 1, "update": 0.965, "loss": "2.337", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "80.45", "wer_total": "127.515", "n_error": "46.99", "ppl": "5.05", "accuracy": "63.091", "wer": "36.851", "wps": "79.2", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "12200", "lr": "0.000359631", "gnorm": "4.035", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "24833"}
-[2024-06-08 20:21:33,336][train_inner][INFO] - {"epoch": 1, "update": 0.98, "loss": "2.245", "ntokens": "130.215", "acc_total": "130.215", "n_correct": "85.24", "wer_total": "130.215", "n_error": "44.92", "ppl": "4.74", "accuracy": "65.461", "wer": "34.497", "wps": "80.8", "ups": "0.62", "wpb": "130.2", "bsz": "8", "num_updates": "12400", "lr": "0.000349017", "gnorm": "3.662", "loss_scale": "4096", "train_wall": "321", "gb_free": "7.1", "wall": "25155"}
-[2024-06-08 20:22:00,667][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 20:24:15,974][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 20:45:33,062][valid][INFO] - {"epoch": 1, "valid_loss": "2.163", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "10.1102", "valid_wer_total": "15.5723", "valid_n_error": "5.4555", "valid_ppl": "4.48", "valid_accuracy": "64.924", "valid_wer": "35.033", "valid_wps": "154.4", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "12500", "valid_best_accuracy": "64.924"}
-[2024-06-08 20:45:33,063][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12500 updates
-[2024-06-08 20:45:33,063][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12500.pt
-[2024-06-08 20:45:36,244][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12500.pt
-[2024-06-08 20:45:40,481][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12500.pt (epoch 1 @ 12500 updates, score 64.924) (writing took 7.41779721097555 seconds)
-[2024-06-08 20:48:21,126][train_inner][INFO] - {"epoch": 1, "update": 0.996, "loss": "2.238", "ntokens": "127.435", "acc_total": "127.435", "n_correct": "84.635", "wer_total": "127.435", "n_error": "42.745", "ppl": "4.72", "accuracy": "66.414", "wer": "33.543", "wps": "15.9", "ups": "0.12", "wpb": "127.4", "bsz": "8", "num_updates": "12600", "lr": "0.000338716", "gnorm": "3.753", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "26763"}
-[2024-06-08 20:49:34,478][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 21:10:51,412][valid][INFO] - {"epoch": 1, "valid_loss": "2.14", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "10.5792", "valid_wer_total": "15.5723", "valid_n_error": "4.98744", "valid_ppl": "4.41", "valid_accuracy": "67.936", "valid_wer": "32.028", "valid_wps": "154.4", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "12646", "valid_best_accuracy": "67.936"}
-[2024-06-08 21:10:51,412][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12646 updates
-[2024-06-08 21:10:51,413][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_best.pt
-[2024-06-08 21:10:55,367][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_best.pt
-[2024-06-08 21:10:57,755][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 1 @ 12646 updates, score 67.936) (writing took 6.342292756948154 seconds)
-[2024-06-08 21:10:57,755][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below)
-[2024-06-08 21:10:57,761][train][INFO] - {"epoch": 1, "train_loss": "3.759", "train_ntokens": "128.044", "train_acc_total": "128.044", "train_n_correct": "61.1846", "train_wer_total": "128.044", "train_n_error": "66.7107", "train_ppl": "13.54", "train_accuracy": "47.784", "train_wer": "52.1", "train_wps": "57.6", "train_ups": "0.45", "train_wpb": "128", "train_bsz": "8", "train_num_updates": "12646", "train_lr": "0.000336391", "train_gnorm": "3.805", "train_loss_scale": "2048", "train_train_wall": "20349", "train_gb_free": "7.1", "train_wall": "28120"}
-[2024-06-08 21:10:57,812][fairseq.trainer][INFO] - begin training epoch 2
-[2024-06-08 21:10:57,813][fairseq_cli.train][INFO] - Start iterating over samples
-[2024-06-08 21:15:05,431][train_inner][INFO] - {"epoch": 2, "update": 1.012, "loss": "2.064", "ntokens": "128.01", "acc_total": "128.01", "n_correct": "87.865", "wer_total": "128.01", "n_error": "40.085", "ppl": "4.18", "accuracy": "68.639", "wer": "31.314", "wps": "16", "ups": "0.12", "wpb": "128", "bsz": "8", "num_updates": "12800", "lr": "0.00032872", "gnorm": "3.633", "loss_scale": "2048", "train_wall": "320", "gb_free": "7.1", "wall": "28367"}
-[2024-06-08 21:20:27,481][train_inner][INFO] - {"epoch": 2, "update": 1.028, "loss": "2.033", "ntokens": "128.055", "acc_total": "128.055", "n_correct": "89.085", "wer_total": "128.055", "n_error": "38.92", "ppl": "4.09", "accuracy": "69.568", "wer": "30.393", "wps": "79.5", "ups": "0.62", "wpb": "128.1", "bsz": "8", "num_updates": "13000", "lr": "0.000319018", "gnorm": "3.542", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "28690"}
-[2024-06-08 21:25:49,530][train_inner][INFO] - {"epoch": 2, "update": 1.044, "loss": "2.05", "ntokens": "128.53", "acc_total": "128.53", "n_correct": "85.335", "wer_total": "128.53", "n_error": "43.125", "ppl": "4.14", "accuracy": "66.393", "wer": "33.552", "wps": "79.8", "ups": "0.62", "wpb": "128.5", "bsz": "8", "num_updates": "13200", "lr": "0.000309603", "gnorm": "3.642", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "29012"}
-[2024-06-08 21:31:11,446][train_inner][INFO] - {"epoch": 2, "update": 1.06, "loss": "2.033", "ntokens": "128.945", "acc_total": "128.945", "n_correct": "86.145", "wer_total": "128.945", "n_error": "42.765", "ppl": "4.09", "accuracy": "66.808", "wer": "33.165", "wps": "80.1", "ups": "0.62", "wpb": "128.9", "bsz": "8", "num_updates": "13400", "lr": "0.000300466", "gnorm": "3.478", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "29333"}
-[2024-06-08 21:36:33,638][train_inner][INFO] - {"epoch": 2, "update": 1.075, "loss": "1.973", "ntokens": "128.04", "acc_total": "128.04", "n_correct": "86.67", "wer_total": "128.04", "n_error": "41.3", "ppl": "3.93", "accuracy": "67.69", "wer": "32.256", "wps": "79.5", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "13600", "lr": "0.000291598", "gnorm": "3.487", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "29656"}
-[2024-06-08 21:41:56,017][train_inner][INFO] - {"epoch": 2, "update": 1.091, "loss": "1.991", "ntokens": "128.705", "acc_total": "128.705", "n_correct": "86.48", "wer_total": "128.705", "n_error": "42.185", "ppl": "3.97", "accuracy": "67.192", "wer": "32.777", "wps": "79.8", "ups": "0.62", "wpb": "128.7", "bsz": "8", "num_updates": "13800", "lr": "0.000282992", "gnorm": "3.608", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "29978"}
-[2024-06-08 21:47:18,266][train_inner][INFO] - {"epoch": 2, "update": 1.107, "loss": "1.994", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "86.36", "wer_total": "126.77", "n_error": "40.36", "ppl": "3.98", "accuracy": "68.123", "wer": "31.837", "wps": "78.7", "ups": "0.62", "wpb": "126.8", "bsz": "8", "num_updates": "14000", "lr": "0.00027464", "gnorm": "3.501", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "30300"}
-[2024-06-08 21:52:40,561][train_inner][INFO] - {"epoch": 2, "update": 1.123, "loss": "1.985", "ntokens": "128.03", "acc_total": "128.03", "n_correct": "88.06", "wer_total": "128.03", "n_error": "39.92", "ppl": "3.96", "accuracy": "68.781", "wer": "31.18", "wps": "79.4", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "14200", "lr": "0.000266535", "gnorm": "3.484", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "30623"}
-[2024-06-08 21:58:02,940][train_inner][INFO] - {"epoch": 2, "update": 1.139, "loss": "1.954", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "87.005", "wer_total": "127.94", "n_error": "40.89", "ppl": "3.88", "accuracy": "68.005", "wer": "31.96", "wps": "79.4", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "14400", "lr": "0.000258668", "gnorm": "3.47", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "30945"}
-[2024-06-08 22:03:25,393][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "1.925", "ntokens": "127.65", "acc_total": "127.65", "n_correct": "87.05", "wer_total": "127.65", "n_error": "40.57", "ppl": "3.8", "accuracy": "68.194", "wer": "31.782", "wps": "79.2", "ups": "0.62", "wpb": "127.7", "bsz": "8", "num_updates": "14600", "lr": "0.000251034", "gnorm": "3.452", "loss_scale": "4096", "train_wall": "322", "gb_free": "7.1", "wall": "31267"}
-[2024-06-08 22:08:47,927][train_inner][INFO] - {"epoch": 2, "update": 1.17, "loss": "1.971", "ntokens": "128.675", "acc_total": "128.675", "n_correct": "92.42", "wer_total": "128.675", "n_error": "36.245", "ppl": "3.92", "accuracy": "71.824", "wer": "28.168", "wps": "79.8", "ups": "0.62", "wpb": "128.7", "bsz": "8", "num_updates": "14800", "lr": "0.000243626", "gnorm": "3.312", "loss_scale": "4096", "train_wall": "322", "gb_free": "7.1", "wall": "31590"}
-[2024-06-08 22:09:20,069][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 22:14:11,851][train_inner][INFO] - {"epoch": 2, "update": 1.186, "loss": "1.843", "ntokens": "128.54", "acc_total": "128.54", "n_correct": "92.845", "wer_total": "128.54", "n_error": "35.69", "ppl": "3.59", "accuracy": "72.23", "wer": "27.766", "wps": "79.4", "ups": "0.62", "wpb": "128.5", "bsz": "8", "num_updates": "15000", "lr": "0.000236435", "gnorm": "3.437", "loss_scale": "2048", "train_wall": "323", "gb_free": "7.1", "wall": "31914"}
-[2024-06-08 22:14:11,852][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 22:35:28,938][valid][INFO] - {"epoch": 2, "valid_loss": "1.968", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "11.0025", "valid_wer_total": "15.5723", "valid_n_error": "4.56401", "valid_ppl": "3.91", "valid_accuracy": "70.654", "valid_wer": "29.308", "valid_wps": "154.4", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "70.654"}
-[2024-06-08 22:35:28,939][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 15000 updates
-[2024-06-08 22:35:28,939][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_15000.pt
-[2024-06-08 22:35:32,052][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_15000.pt
-[2024-06-08 22:35:36,475][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_15000.pt (epoch 2 @ 15000 updates, score 70.654) (writing took 7.536663006991148 seconds)
-[2024-06-08 22:40:58,073][train_inner][INFO] - {"epoch": 2, "update": 1.202, "loss": "1.879", "ntokens": "127.16", "acc_total": "127.16", "n_correct": "91.58", "wer_total": "127.16", "n_error": "35.535", "ppl": "3.68", "accuracy": "72.02", "wer": "27.945", "wps": "15.8", "ups": "0.12", "wpb": "127.2", "bsz": "8", "num_updates": "15200", "lr": "0.000229457", "gnorm": "3.497", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "33520"}
-[2024-06-08 22:42:44,273][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
-[2024-06-08 22:46:21,609][train_inner][INFO] - {"epoch": 2, "update": 1.218, "loss": "1.811", "ntokens": "129", "acc_total": "129", "n_correct": "95.255", "wer_total": "129", "n_error": "33.725", "ppl": "3.51", "accuracy": "73.841", "wer": "26.143", "wps": "79.7", "ups": "0.62", "wpb": "129", "bsz": "8", "num_updates": "15400", "lr": "0.000222685", "gnorm": "3.33", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "33844"}
-[2024-06-08 22:51:43,494][train_inner][INFO] - {"epoch": 2, "update": 1.234, "loss": "1.907", "ntokens": "129.425", "acc_total": "129.425", "n_correct": "91.93", "wer_total": "129.425", "n_error": "37.485", "ppl": "3.75", "accuracy": "71.03", "wer": "28.963", "wps": "80.4", "ups": "0.62", "wpb": "129.4", "bsz": "8", "num_updates": "15600", "lr": "0.000216113", "gnorm": "3.334", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "34166"}
-[2024-06-08 22:57:05,463][train_inner][INFO] - {"epoch": 2, "update": 1.25, "loss": "1.865", "ntokens": "128.47", "acc_total": "128.47", "n_correct": "93.32", "wer_total": "128.47", "n_error": "35.105", "ppl": "3.64", "accuracy": "72.64", "wer": "27.325", "wps": "79.8", "ups": "0.62", "wpb": "128.5", "bsz": "8", "num_updates": "15800", "lr": "0.000209735", "gnorm": "3.316", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "34487"}
-[2024-06-08 23:02:27,384][train_inner][INFO] - {"epoch": 2, "update": 1.265, "loss": "1.867", "ntokens": "127.59", "acc_total": "127.59", "n_correct": "92.48", "wer_total": "127.59", "n_error": "35.075", "ppl": "3.65", "accuracy": "72.482", "wer": "27.49", "wps": "79.3", "ups": "0.62", "wpb": "127.6", "bsz": "8", "num_updates": "16000", "lr": "0.000203545", "gnorm": "3.334", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "34809"}
-[2024-06-08 23:07:49,261][train_inner][INFO] - {"epoch": 2, "update": 1.281, "loss": "1.879", "ntokens": "129.08", "acc_total": "129.08", "n_correct": "94.255", "wer_total": "129.08", "n_error": "34.79", "ppl": "3.68", "accuracy": "73.021", "wer": "26.952", "wps": "80.2", "ups": "0.62", "wpb": "129.1", "bsz": "8", "num_updates": "16200", "lr": "0.000197538", "gnorm": "3.377", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "35131"}
-[2024-06-08 23:13:11,304][train_inner][INFO] - {"epoch": 2, "update": 1.297, "loss": "1.848", "ntokens": "128.535", "acc_total": "128.535", "n_correct": "92.125", "wer_total": "128.535", "n_error": "36.37", "ppl": "3.6", "accuracy": "71.673", "wer": "28.296", "wps": "79.8", "ups": "0.62", "wpb": "128.5", "bsz": "8", "num_updates": "16400", "lr": "0.000191708", "gnorm": "3.401", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "35453"}
-[2024-06-08 23:18:33,226][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "1.827", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "92.175", "wer_total": "127.33", "n_error": "35.125", "ppl": "3.55", "accuracy": "72.391", "wer": "27.586", "wps": "79.1", "ups": "0.62", "wpb": "127.3", "bsz": "8", "num_updates": "16600", "lr": "0.00018605", "gnorm": "3.285", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "35775"}
-[2024-06-08 23:23:55,442][train_inner][INFO] - {"epoch": 2, "update": 1.329, "loss": "1.81", "ntokens": "128.755", "acc_total": "128.755", "n_correct": "92.31", "wer_total": "128.755", "n_error": "36.41", "ppl": "3.51", "accuracy": "71.694", "wer": "28.279", "wps": "79.9", "ups": "0.62", "wpb": "128.8", "bsz": "8", "num_updates": "16800", "lr": "0.000180559", "gnorm": "3.231", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "36097"}
-[2024-06-08 23:29:17,369][train_inner][INFO] - {"epoch": 2, "update": 1.344, "loss": "1.761", "ntokens": "127.755", "acc_total": "127.755", "n_correct": "92.265", "wer_total": "127.755", "n_error": "35.47", "ppl": "3.39", "accuracy": "72.22", "wer": "27.764", "wps": "79.4", "ups": "0.62", "wpb": "127.8", "bsz": "8", "num_updates": "17000", "lr": "0.00017523", "gnorm": "3.216", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "36419"}
-[2024-06-08 23:34:39,426][train_inner][INFO] - {"epoch": 2, "update": 1.36, "loss": "1.751", "ntokens": "127.955", "acc_total": "127.955", "n_correct": "91.37", "wer_total": "127.955", "n_error": "36.57", "ppl": "3.37", "accuracy": "71.408", "wer": "28.58", "wps": "79.5", "ups": "0.62", "wpb": "128", "bsz": "8", "num_updates": "17200", "lr": "0.000170059", "gnorm": "3.193", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "36741"}
-[2024-06-08 23:40:01,426][train_inner][INFO] - {"epoch": 2, "update": 1.376, "loss": "1.839", "ntokens": "128.75", "acc_total": "128.75", "n_correct": "93.005", "wer_total": "128.75", "n_error": "35.725", "ppl": "3.58", "accuracy": "72.237", "wer": "27.748", "wps": "80", "ups": "0.62", "wpb": "128.8", "bsz": "8", "num_updates": "17400", "lr": "0.00016504", "gnorm": "3.166", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "37063"}
-[2024-06-08 23:42:42,319][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-09 00:03:58,802][valid][INFO] - {"epoch": 2, "valid_loss": "1.8", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "11.4577", "valid_wer_total": "15.5723", "valid_n_error": "4.11198", "valid_ppl": "3.48", "valid_accuracy": "73.578", "valid_wer": "26.406", "valid_wps": "154.5", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "17500", "valid_best_accuracy": "73.578"}
-[2024-06-09 00:03:58,802][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 17500 updates
-[2024-06-09 00:03:58,802][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_17500.pt
-[2024-06-09 00:04:01,967][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_17500.pt
-[2024-06-09 00:04:06,204][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_17500.pt (epoch 2 @ 17500 updates, score 73.578) (writing took 7.401797092985362 seconds)
-[2024-06-09 00:06:46,865][train_inner][INFO] - {"epoch": 2, "update": 1.392, "loss": "1.765", "ntokens": "128.49", "acc_total": "128.49", "n_correct": "94.375", "wer_total": "128.49", "n_error": "34.085", "ppl": "3.4", "accuracy": "73.449", "wer": "26.527", "wps": "16", "ups": "0.12", "wpb": "128.5", "bsz": "8", "num_updates": "17600", "lr": "0.000160169", "gnorm": "3.209", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "38669"}
-[2024-06-09 00:12:09,158][train_inner][INFO] - {"epoch": 2, "update": 1.408, "loss": "1.789", "ntokens": "128.16", "acc_total": "128.16", "n_correct": "93.155", "wer_total": "128.16", "n_error": "34.975", "ppl": "3.46", "accuracy": "72.686", "wer": "27.29", "wps": "79.5", "ups": "0.62", "wpb": "128.2", "bsz": "8", "num_updates": "17800", "lr": "0.000155442", "gnorm": "3.159", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "38991"}
-[2024-06-09 00:17:30,988][train_inner][INFO] - {"epoch": 2, "update": 1.423, "loss": "1.738", "ntokens": "127.91", "acc_total": "127.91", "n_correct": "90.655", "wer_total": "127.91", "n_error": "37.22", "ppl": "3.33", "accuracy": "70.874", "wer": "29.099", "wps": "79.5", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "18000", "lr": "0.000150854", "gnorm": "3.097", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "39313"}
-[2024-06-09 00:19:28,454][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
-[2024-06-09 00:23:17,839][train_inner][INFO] - {"epoch": 2, "update": 1.439, "loss": "1.838", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "91.56", "wer_total": "127.1", "n_error": "35.515", "ppl": "3.57", "accuracy": "72.038", "wer": "27.943", "wps": "73.3", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "18200", "lr": "0.000146402", "gnorm": "13.197", "loss_scale": "1024", "train_wall": "346", "gb_free": "6.5", "wall": "39660"}
-[2024-06-09 00:29:03,085][train_inner][INFO] - {"epoch": 2, "update": 1.455, "loss": "1.725", "ntokens": "128.06", "acc_total": "128.06", "n_correct": "94.445", "wer_total": "128.06", "n_error": "33.58", "ppl": "3.31", "accuracy": "73.751", "wer": "26.222", "wps": "74.2", "ups": "0.58", "wpb": "128.1", "bsz": "8", "num_updates": "18400", "lr": "0.000142081", "gnorm": "11.918", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "40005"}
-[2024-06-09 00:34:48,159][train_inner][INFO] - {"epoch": 2, "update": 1.471, "loss": "1.667", "ntokens": "127.77", "acc_total": "127.77", "n_correct": "96.02", "wer_total": "127.77", "n_error": "31.725", "ppl": "3.18", "accuracy": "75.151", "wer": "24.83", "wps": "74.1", "ups": "0.58", "wpb": "127.8", "bsz": "8", "num_updates": "18600", "lr": "0.000137888", "gnorm": "11.358", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "40350"}
-[2024-06-09 00:40:33,252][train_inner][INFO] - {"epoch": 2, "update": 1.487, "loss": "1.734", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "94.105", "wer_total": "127.345", "n_error": "33.23", "ppl": "3.33", "accuracy": "73.898", "wer": "26.094", "wps": "73.8", "ups": "0.58", "wpb": "127.3", "bsz": "8", "num_updates": "18800", "lr": "0.000133819", "gnorm": "11.833", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "40695"}
-[2024-06-09 00:46:18,367][train_inner][INFO] - {"epoch": 2, "update": 1.503, "loss": "1.665", "ntokens": "127.845", "acc_total": "127.845", "n_correct": "97.185", "wer_total": "127.845", "n_error": "30.63", "ppl": "3.17", "accuracy": "76.018", "wer": "23.959", "wps": "74.1", "ups": "0.58", "wpb": "127.8", "bsz": "8", "num_updates": "19000", "lr": "0.000129869", "gnorm": "11.307", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "41040"}
-[2024-06-09 00:52:03,702][train_inner][INFO] - {"epoch": 2, "update": 1.518, "loss": "1.614", "ntokens": "127.27", "acc_total": "127.27", "n_correct": "97.48", "wer_total": "127.27", "n_error": "29.77", "ppl": "3.06", "accuracy": "76.593", "wer": "23.391", "wps": "73.7", "ups": "0.58", "wpb": "127.3", "bsz": "8", "num_updates": "19200", "lr": "0.000126036", "gnorm": "10.546", "loss_scale": "1024", "train_wall": "345", "gb_free": "6.5", "wall": "41386"}
-[2024-06-09 00:57:48,692][train_inner][INFO] - {"epoch": 2, "update": 1.534, "loss": "1.574", "ntokens": "128.9", "acc_total": "128.9", "n_correct": "98.11", "wer_total": "128.9", "n_error": "30.775", "ppl": "2.98", "accuracy": "76.113", "wer": "23.875", "wps": "74.7", "ups": "0.58", "wpb": "128.9", "bsz": "8", "num_updates": "19400", "lr": "0.000122317", "gnorm": "10.409", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "41731"}
-[2024-06-09 01:03:33,596][train_inner][INFO] - {"epoch": 2, "update": 1.55, "loss": "1.552", "ntokens": "128.29", "acc_total": "128.29", "n_correct": "99.71", "wer_total": "128.29", "n_error": "28.57", "ppl": "2.93", "accuracy": "77.722", "wer": "22.27", "wps": "74.4", "ups": "0.58", "wpb": "128.3", "bsz": "8", "num_updates": "19600", "lr": "0.000118707", "gnorm": "10.375", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "42076"}
-[2024-06-09 01:09:18,987][train_inner][INFO] - {"epoch": 2, "update": 1.566, "loss": "1.556", "ntokens": "127.45", "acc_total": "127.45", "n_correct": "98.785", "wer_total": "127.45", "n_error": "28.65", "ppl": "2.94", "accuracy": "77.509", "wer": "22.479", "wps": "73.8", "ups": "0.58", "wpb": "127.5", "bsz": "8", "num_updates": "19800", "lr": "0.000115203", "gnorm": "10.258", "loss_scale": "1024", "train_wall": "345", "gb_free": "6.5", "wall": "42421"}
-[2024-06-09 01:15:04,292][train_inner][INFO] - {"epoch": 2, "update": 1.582, "loss": "1.512", "ntokens": "128.54", "acc_total": "128.54", "n_correct": "100.25", "wer_total": "128.54", "n_error": "28.285", "ppl": "2.85", "accuracy": "77.991", "wer": "22.005", "wps": "74.5", "ups": "0.58", "wpb": "128.5", "bsz": "8", "num_updates": "20000", "lr": "0.000111803", "gnorm": "10.437", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "42766"}
-[2024-06-09 01:15:04,292][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-09 01:36:20,382][valid][INFO] - {"epoch": 2, "valid_loss": "1.552", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "12.0531", "valid_wer_total": "15.5723", "valid_n_error": "3.51662", "valid_ppl": "2.93", "valid_accuracy": "77.401", "valid_wer": "22.583", "valid_wps": "154.5", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "20000", "valid_best_accuracy": "77.401"}
-[2024-06-09 01:36:20,383][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 20000 updates
-[2024-06-09 01:36:20,383][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_20000.pt
-[2024-06-09 01:36:23,542][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_20000.pt
-[2024-06-09 01:36:28,768][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_20000.pt (epoch 2 @ 20000 updates, score 77.401) (writing took 8.385050918965135 seconds)
-[2024-06-09 01:42:13,564][train_inner][INFO] - {"epoch": 2, "update": 1.597, "loss": "1.552", "ntokens": "127.605", "acc_total": "127.605", "n_correct": "99.715", "wer_total": "127.605", "n_error": "27.89", "ppl": "2.93", "accuracy": "78.143", "wer": "21.857", "wps": "15.7", "ups": "0.12", "wpb": "127.6", "bsz": "8", "num_updates": "20200", "lr": "0.000108504", "gnorm": "9.757", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "44396"}
-[2024-06-09 01:47:58,589][train_inner][INFO] - {"epoch": 2, "update": 1.613, "loss": "1.504", "ntokens": "127.79", "acc_total": "127.79", "n_correct": "100.75", "wer_total": "127.79", "n_error": "27.03", "ppl": "2.84", "accuracy": "78.84", "wer": "21.152", "wps": "74.1", "ups": "0.58", "wpb": "127.8", "bsz": "8", "num_updates": "20400", "lr": "0.000105301", "gnorm": "9.835", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "44741"}
-[2024-06-09 01:53:43,695][train_inner][INFO] - {"epoch": 2, "update": 1.629, "loss": "1.462", "ntokens": "127.675", "acc_total": "127.675", "n_correct": "100.875", "wer_total": "127.675", "n_error": "26.785", "ppl": "2.76", "accuracy": "79.009", "wer": "20.979", "wps": "74", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "20600", "lr": "0.000102194", "gnorm": "9.779", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "45086"}
-[2024-06-09 01:59:28,882][train_inner][INFO] - {"epoch": 2, "update": 1.645, "loss": "1.448", "ntokens": "128.22", "acc_total": "128.22", "n_correct": "101.685", "wer_total": "128.22", "n_error": "26.525", "ppl": "2.73", "accuracy": "79.305", "wer": "20.687", "wps": "74.3", "ups": "0.58", "wpb": "128.2", "bsz": "8", "num_updates": "20800", "lr": "9.91776e-05", "gnorm": "10.002", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "45431"}
-[2024-06-09 02:05:14,015][train_inner][INFO] - {"epoch": 2, "update": 1.661, "loss": "1.446", "ntokens": "128.145", "acc_total": "128.145", "n_correct": "101.96", "wer_total": "128.145", "n_error": "26.18", "ppl": "2.72", "accuracy": "79.566", "wer": "20.43", "wps": "74.3", "ups": "0.58", "wpb": "128.1", "bsz": "8", "num_updates": "21000", "lr": "9.62506e-05", "gnorm": "9.808", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "45776"}
-[2024-06-09 02:10:59,215][train_inner][INFO] - {"epoch": 2, "update": 1.677, "loss": "1.473", "ntokens": "127.55", "acc_total": "127.55", "n_correct": "101.58", "wer_total": "127.55", "n_error": "25.955", "ppl": "2.78", "accuracy": "79.639", "wer": "20.349", "wps": "73.9", "ups": "0.58", "wpb": "127.5", "bsz": "8", "num_updates": "21200", "lr": "9.341e-05", "gnorm": "9.621", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "46121"}
-[2024-06-09 02:16:44,149][train_inner][INFO] - {"epoch": 2, "update": 1.692, "loss": "1.481", "ntokens": "128.52", "acc_total": "128.52", "n_correct": "101.86", "wer_total": "128.52", "n_error": "26.62", "ppl": "2.79", "accuracy": "79.256", "wer": "20.713", "wps": "74.5", "ups": "0.58", "wpb": "128.5", "bsz": "8", "num_updates": "21400", "lr": "9.06532e-05", "gnorm": "9.201", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "46466"}
-[2024-06-09 02:19:41,763][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
-[2024-06-09 02:22:30,777][train_inner][INFO] - {"epoch": 2, "update": 1.708, "loss": "1.455", "ntokens": "127.77", "acc_total": "127.77", "n_correct": "101.385", "wer_total": "127.77", "n_error": "26.37", "ppl": "2.74", "accuracy": "79.35", "wer": "20.639", "wps": "73.7", "ups": "0.58", "wpb": "127.8", "bsz": "8", "num_updates": "21600", "lr": "8.79777e-05", "gnorm": "9.332", "loss_scale": "1024", "train_wall": "346", "gb_free": "6.5", "wall": "46813"}
-[2024-06-09 02:28:15,686][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "1.325", "ntokens": "127.96", "acc_total": "127.96", "n_correct": "102.99", "wer_total": "127.96", "n_error": "24.965", "ppl": "2.51", "accuracy": "80.486", "wer": "19.51", "wps": "74.2", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "21800", "lr": "8.53812e-05", "gnorm": "9.231", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "47158"}
-[2024-06-09 02:33:10,615][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0
-[2024-06-09 02:34:02,301][train_inner][INFO] - {"epoch": 2, "update": 1.74, "loss": "1.383", "ntokens": "128.975", "acc_total": "128.975", "n_correct": "103.38", "wer_total": "128.975", "n_error": "25.585", "ppl": "2.61", "accuracy": "80.155", "wer": "19.837", "wps": "74.4", "ups": "0.58", "wpb": "129", "bsz": "8", "num_updates": "22000", "lr": "8.28614e-05", "gnorm": "9.074", "loss_scale": "512", "train_wall": "346", "gb_free": "6.5", "wall": "47504"}
-[2024-06-09 02:39:47,303][train_inner][INFO] - {"epoch": 2, "update": 1.756, "loss": "1.42", "ntokens": "127.665", "acc_total": "127.665", "n_correct": "102.175", "wer_total": "127.665", "n_error": "25.47", "ppl": "2.68", "accuracy": "80.034", "wer": "19.951", "wps": "74", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "22200", "lr": "8.04159e-05", "gnorm": "9.368", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "47849"}
-[2024-06-09 02:45:32,407][train_inner][INFO] - {"epoch": 2, "update": 1.772, "loss": "1.457", "ntokens": "127.005", "acc_total": "127.005", "n_correct": "100.485", "wer_total": "127.005", "n_error": "26.505", "ppl": "2.74", "accuracy": "79.119", "wer": "20.869", "wps": "73.6", "ups": "0.58", "wpb": "127", "bsz": "8", "num_updates": "22400", "lr": "7.80425e-05", "gnorm": "9.558", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "48194"}
-[2024-06-09 02:48:25,162][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-09 03:09:40,396][valid][INFO] - {"epoch": 2, "valid_loss": "1.38", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "12.4863", "valid_wer_total": "15.5723", "valid_n_error": "3.08316", "valid_ppl": "2.6", "valid_accuracy": "80.183", "valid_wer": "19.799", "valid_wps": "154.6", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "22500", "valid_best_accuracy": "80.183"}
-[2024-06-09 03:09:40,396][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 22500 updates
-[2024-06-09 03:09:40,397][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_22500.pt
-[2024-06-09 03:09:43,612][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_22500.pt
-[2024-06-09 03:09:50,377][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_22500.pt (epoch 2 @ 22500 updates, score 80.183) (writing took 9.980852170963772 seconds)
-[2024-06-09 03:12:42,562][train_inner][INFO] - {"epoch": 2, "update": 1.787, "loss": "1.37", "ntokens": "127.585", "acc_total": "127.585", "n_correct": "102.25", "wer_total": "127.585", "n_error": "25.325", "ppl": "2.58", "accuracy": "80.143", "wer": "19.85", "wps": "15.7", "ups": "0.12", "wpb": "127.6", "bsz": "8", "num_updates": "22600", "lr": "7.57393e-05", "gnorm": "9.243", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "49825"}
-[2024-06-09 03:18:28,147][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "1.415", "ntokens": "128.24", "acc_total": "128.24", "n_correct": "101.68", "wer_total": "128.24", "n_error": "26.55", "ppl": "2.67", "accuracy": "79.289", "wer": "20.703", "wps": "74.2", "ups": "0.58", "wpb": "128.2", "bsz": "8", "num_updates": "22800", "lr": "7.3504e-05", "gnorm": "9.49", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "50170"}
-[2024-06-09 03:24:13,514][train_inner][INFO] - {"epoch": 2, "update": 1.819, "loss": "1.379", "ntokens": "128.03", "acc_total": "128.03", "n_correct": "102.465", "wer_total": "128.03", "n_error": "25.545", "ppl": "2.6", "accuracy": "80.032", "wer": "19.952", "wps": "74.1", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "23000", "lr": "7.13346e-05", "gnorm": "9.154", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "50516"}
-[2024-06-09 03:29:58,917][train_inner][INFO] - {"epoch": 2, "update": 1.835, "loss": "1.304", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "102.925", "wer_total": "127.595", "n_error": "24.65", "ppl": "2.47", "accuracy": "80.665", "wer": "19.319", "wps": "73.9", "ups": "0.58", "wpb": "127.6", "bsz": "8", "num_updates": "23200", "lr": "6.92293e-05", "gnorm": "9.001", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "50861"}
-[2024-06-09 03:35:44,302][train_inner][INFO] - {"epoch": 2, "update": 1.851, "loss": "1.337", "ntokens": "127.635", "acc_total": "127.635", "n_correct": "103.18", "wer_total": "127.635", "n_error": "24.455", "ppl": "2.53", "accuracy": "80.84", "wer": "19.16", "wps": "73.9", "ups": "0.58", "wpb": "127.6", "bsz": "8", "num_updates": "23400", "lr": "6.71862e-05", "gnorm": "9.237", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "51206"}
-[2024-06-09 03:41:29,695][train_inner][INFO] - {"epoch": 2, "update": 1.866, "loss": "1.326", "ntokens": "128.225", "acc_total": "128.225", "n_correct": "104.13", "wer_total": "128.225", "n_error": "24.09", "ppl": "2.51", "accuracy": "81.209", "wer": "18.787", "wps": "74.2", "ups": "0.58", "wpb": "128.2", "bsz": "8", "num_updates": "23600", "lr": "6.52033e-05", "gnorm": "9.03", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "51552"}
-[2024-06-09 03:47:15,271][train_inner][INFO] - {"epoch": 2, "update": 1.882, "loss": "1.327", "ntokens": "127.665", "acc_total": "127.665", "n_correct": "103.4", "wer_total": "127.665", "n_error": "24.245", "ppl": "2.51", "accuracy": "80.993", "wer": "18.991", "wps": "73.9", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "23800", "lr": "6.3279e-05", "gnorm": "9.067", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "51897"}
-[2024-06-09 03:53:00,231][train_inner][INFO] - {"epoch": 2, "update": 1.898, "loss": "1.353", "ntokens": "126.965", "acc_total": "126.965", "n_correct": "102.72", "wer_total": "126.965", "n_error": "24.235", "ppl": "2.55", "accuracy": "80.904", "wer": "19.088", "wps": "73.6", "ups": "0.58", "wpb": "127", "bsz": "8", "num_updates": "24000", "lr": "6.14114e-05", "gnorm": "8.995", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "52242"}
-[2024-06-09 03:58:45,602][train_inner][INFO] - {"epoch": 2, "update": 1.914, "loss": "1.329", "ntokens": "128.02", "acc_total": "128.02", "n_correct": "104.135", "wer_total": "128.02", "n_error": "23.88", "ppl": "2.51", "accuracy": "81.343", "wer": "18.653", "wps": "74.1", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "24200", "lr": "5.9599e-05", "gnorm": "9.083", "loss_scale": "1024", "train_wall": "345", "gb_free": "6.5", "wall": "52588"}
-[2024-06-09 04:04:30,656][train_inner][INFO] - {"epoch": 2, "update": 1.93, "loss": "1.267", "ntokens": "128.935", "acc_total": "128.935", "n_correct": "105.425", "wer_total": "128.935", "n_error": "23.505", "ppl": "2.41", "accuracy": "81.766", "wer": "18.23", "wps": "74.7", "ups": "0.58", "wpb": "128.9", "bsz": "8", "num_updates": "24400", "lr": "5.784e-05", "gnorm": "8.82", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "52933"}
-[2024-06-09 04:10:15,984][train_inner][INFO] - {"epoch": 2, "update": 1.946, "loss": "1.292", "ntokens": "128.01", "acc_total": "128.01", "n_correct": "104.26", "wer_total": "128.01", "n_error": "23.74", "ppl": "2.45", "accuracy": "81.447", "wer": "18.545", "wps": "74.1", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "24600", "lr": "5.6133e-05", "gnorm": "8.869", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "53278"}
-[2024-06-09 04:16:00,937][train_inner][INFO] - {"epoch": 2, "update": 1.961, "loss": "1.272", "ntokens": "128.26", "acc_total": "128.26", "n_correct": "104.03", "wer_total": "128.26", "n_error": "24.225", "ppl": "2.42", "accuracy": "81.109", "wer": "18.887", "wps": "74.4", "ups": "0.58", "wpb": "128.3", "bsz": "8", "num_updates": "24800", "lr": "5.44763e-05", "gnorm": "8.976", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "53623"}
-[2024-06-09 04:21:46,173][train_inner][INFO] - {"epoch": 2, "update": 1.977, "loss": "1.282", "ntokens": "127.78", "acc_total": "127.78", "n_correct": "104.3", "wer_total": "127.78", "n_error": "23.475", "ppl": "2.43", "accuracy": "81.625", "wer": "18.371", "wps": "74", "ups": "0.58", "wpb": "127.8", "bsz": "8", "num_updates": "25000", "lr": "5.28686e-05", "gnorm": "9.141", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "53968"}
-[2024-06-09 04:21:46,174][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-09 04:43:01,525][valid][INFO] - {"epoch": 2, "valid_loss": "1.282", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "12.7182", "valid_wer_total": "15.5723", "valid_n_error": "2.85256", "valid_ppl": "2.43", "valid_accuracy": "81.672", "valid_wer": "18.318", "valid_wps": "154.6", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "25000", "valid_best_accuracy": "81.672"}
-[2024-06-09 04:43:01,526][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25000 updates
-[2024-06-09 04:43:01,526][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_25000.pt
-[2024-06-09 04:43:04,657][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_25000.pt
-[2024-06-09 04:43:08,744][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_25000.pt (epoch 2 @ 25000 updates, score 81.672) (writing took 7.218426860985346 seconds)
-[2024-06-09 04:48:53,449][train_inner][INFO] - {"epoch": 2, "update": 1.993, "loss": "1.265", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "104.285", "wer_total": "127.19", "n_error": "22.895", "ppl": "2.4", "accuracy": "81.992", "wer": "18.001", "wps": "15.6", "ups": "0.12", "wpb": "127.2", "bsz": "8", "num_updates": "25200", "lr": "5.13083e-05", "gnorm": "8.89", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "55595"}
-[2024-06-09 04:51:26,306][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-09 05:12:42,896][valid][INFO] - {"epoch": 2, "valid_loss": "1.264", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "12.7604", "valid_wer_total": "15.5723", "valid_n_error": "2.81016", "valid_ppl": "2.4", "valid_accuracy": "81.943", "valid_wer": "18.046", "valid_wps": "154.5", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "25289", "valid_best_accuracy": "81.943"}
-[2024-06-09 05:12:42,897][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25289 updates
-[2024-06-09 05:12:42,897][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_best.pt
-[2024-06-09 05:12:46,858][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_best.pt
-[2024-06-09 05:12:49,200][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 2 @ 25289 updates, score 81.943) (writing took 6.303241335961502 seconds)
-[2024-06-09 05:12:49,201][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below)
-[2024-06-09 05:12:49,202][train][INFO] - {"epoch": 2, "train_loss": "1.639", "train_ntokens": "128.044", "train_acc_total": "128.044", "train_n_correct": "96.6153", "train_wer_total": "128.044", "train_n_error": "31.4058", "train_ppl": "3.11", "train_accuracy": "75.455", "train_wer": "24.527", "train_wps": "56", "train_ups": "0.44", "train_wpb": "128", "train_bsz": "8", "train_num_updates": "25289", "train_lr": "5.06288e-05", "train_gnorm": "7.078", "train_loss_scale": "1024", "train_train_wall": "21158", "train_gb_free": "6.5", "train_wall": "57031"}
-[2024-06-09 05:12:49,247][fairseq.trainer][INFO] - begin training epoch 3
-[2024-06-09 05:12:49,247][fairseq_cli.train][INFO] - Start iterating over samples
-[2024-06-09 05:16:00,321][train_inner][INFO] - {"epoch": 3, "update": 2.009, "loss": "1.137", "ntokens": "128.715", "acc_total": "128.715", "n_correct": "106.895", "wer_total": "128.715", "n_error": "21.8", "ppl": "2.2", "accuracy": "83.048", "wer": "16.937", "wps": "15.8", "ups": "0.12", "wpb": "128.7", "bsz": "8", "num_updates": "25400", "lr": "4.9794e-05", "gnorm": "8.154", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "57222"}
-[2024-06-09 05:21:45,682][train_inner][INFO] - {"epoch": 3, "update": 2.025, "loss": "1.067", "ntokens": "127.795", "acc_total": "127.795", "n_correct": "107.74", "wer_total": "127.795", "n_error": "20.05", "ppl": "2.1", "accuracy": "84.307", "wer": "15.689", "wps": "74", "ups": "0.58", "wpb": "127.8", "bsz": "8", "num_updates": "25600", "lr": "4.83244e-05", "gnorm": "8.18", "loss_scale": "1024", "train_wall": "345", "gb_free": "6.5", "wall": "57568"}
-[2024-06-09 05:27:30,595][train_inner][INFO] - {"epoch": 3, "update": 2.04, "loss": "1.028", "ntokens": "128.005", "acc_total": "128.005", "n_correct": "107.535", "wer_total": "128.005", "n_error": "20.46", "ppl": "2.04", "accuracy": "84.008", "wer": "15.984", "wps": "74.2", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "25800", "lr": "4.68982e-05", "gnorm": "7.977", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "57913"}
-[2024-06-09 05:33:15,589][train_inner][INFO] - {"epoch": 3, "update": 2.056, "loss": "1.023", "ntokens": "127.955", "acc_total": "127.955", "n_correct": "107.625", "wer_total": "127.955", "n_error": "20.33", "ppl": "2.03", "accuracy": "84.112", "wer": "15.888", "wps": "74.2", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "26000", "lr": "4.55141e-05", "gnorm": "7.909", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "58258"}
-[2024-06-09 05:39:00,762][train_inner][INFO] - {"epoch": 3, "update": 2.072, "loss": "1.084", "ntokens": "126.94", "acc_total": "126.94", "n_correct": "105.93", "wer_total": "126.94", "n_error": "21", "ppl": "2.12", "accuracy": "83.449", "wer": "16.543", "wps": "73.6", "ups": "0.58", "wpb": "126.9", "bsz": "8", "num_updates": "26200", "lr": "4.41708e-05", "gnorm": "8.224", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "58603"}
-[2024-06-09 05:44:45,857][train_inner][INFO] - {"epoch": 3, "update": 2.088, "loss": "0.994", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "107.63", "wer_total": "127.52", "n_error": "19.89", "ppl": "1.99", "accuracy": "84.402", "wer": "15.598", "wps": "73.9", "ups": "0.58", "wpb": "127.5", "bsz": "8", "num_updates": "26400", "lr": "4.28672e-05", "gnorm": "7.933", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "58948"}
-[2024-06-09 05:45:25,409][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
-[2024-06-09 05:50:32,339][train_inner][INFO] - {"epoch": 3, "update": 2.104, "loss": "1.012", "ntokens": "128.085", "acc_total": "128.085", "n_correct": "107.325", "wer_total": "128.085", "n_error": "20.755", "ppl": "2.02", "accuracy": "83.792", "wer": "16.204", "wps": "73.9", "ups": "0.58", "wpb": "128.1", "bsz": "8", "num_updates": "26600", "lr": "4.16021e-05", "gnorm": "7.838", "loss_scale": "1024", "train_wall": "346", "gb_free": "6.5", "wall": "59294"}
-[2024-06-09 05:56:17,448][train_inner][INFO] - {"epoch": 3, "update": 2.12, "loss": "1.042", "ntokens": "127.38", "acc_total": "127.38", "n_correct": "105.76", "wer_total": "127.38", "n_error": "21.62", "ppl": "2.06", "accuracy": "83.027", "wer": "16.973", "wps": "73.8", "ups": "0.58", "wpb": "127.4", "bsz": "8", "num_updates": "26800", "lr": "4.03743e-05", "gnorm": "8.079", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "59639"}
-[2024-06-09 06:02:02,614][train_inner][INFO] - {"epoch": 3, "update": 2.135, "loss": "1.038", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "104.755", "wer_total": "126.82", "n_error": "22.06", "ppl": "2.05", "accuracy": "82.601", "wer": "17.395", "wps": "73.5", "ups": "0.58", "wpb": "126.8", "bsz": "8", "num_updates": "27000", "lr": "3.91827e-05", "gnorm": "8.175", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "59985"}
-[2024-06-09 06:07:47,809][train_inner][INFO] - {"epoch": 3, "update": 2.151, "loss": "0.978", "ntokens": "128.005", "acc_total": "128.005", "n_correct": "107.53", "wer_total": "128.005", "n_error": "20.475", "ppl": "1.97", "accuracy": "84.005", "wer": "15.995", "wps": "74.2", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "27200", "lr": "3.80263e-05", "gnorm": "7.894", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "60330"}
-[2024-06-09 06:13:33,094][train_inner][INFO] - {"epoch": 3, "update": 2.167, "loss": "1.037", "ntokens": "127.945", "acc_total": "127.945", "n_correct": "107.775", "wer_total": "127.945", "n_error": "20.16", "ppl": "2.05", "accuracy": "84.235", "wer": "15.757", "wps": "74.1", "ups": "0.58", "wpb": "127.9", "bsz": "8", "num_updates": "27400", "lr": "3.6904e-05", "gnorm": "7.87", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "60675"}
-[2024-06-09 06:16:25,767][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-09 06:37:41,902][valid][INFO] - {"epoch": 3, "valid_loss": "1.23", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "12.6905", "valid_wer_total": "15.5723", "valid_n_error": "2.88004", "valid_ppl": "2.35", "valid_accuracy": "81.494", "valid_wer": "18.495", "valid_wps": "154.5", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "27500", "valid_best_accuracy": "81.943"}
-[2024-06-09 06:37:41,903][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 27500 updates
-[2024-06-09 06:37:41,903][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_3_27500.pt
-[2024-06-09 06:37:45,013][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_3_27500.pt
-[2024-06-09 06:37:47,347][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_3_27500.pt (epoch 3 @ 27500 updates, score 81.494) (writing took 5.444471130031161 seconds)
-[2024-06-09 06:40:39,677][train_inner][INFO] - {"epoch": 3, "update": 2.183, "loss": "1.006", "ntokens": "127.265", "acc_total": "127.265", "n_correct": "107.54", "wer_total": "127.265", "n_error": "19.725", "ppl": "2.01", "accuracy": "84.501", "wer": "15.499", "wps": "15.6", "ups": "0.12", "wpb": "127.3", "bsz": "8", "num_updates": "27600", "lr": "3.58149e-05", "gnorm": "7.871", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "62302"}
-[2024-06-09 06:46:24,882][train_inner][INFO] - {"epoch": 3, "update": 2.199, "loss": "0.995", "ntokens": "127.93", "acc_total": "127.93", "n_correct": "108.25", "wer_total": "127.93", "n_error": "19.67", "ppl": "1.99", "accuracy": "84.617", "wer": "15.376", "wps": "74.1", "ups": "0.58", "wpb": "127.9", "bsz": "8", "num_updates": "27800", "lr": "3.47579e-05", "gnorm": "8.045", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "62647"}
-[2024-06-09 06:52:09,967][train_inner][INFO] - {"epoch": 3, "update": 2.214, "loss": "0.998", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "107.92", "wer_total": "127.42", "n_error": "19.495", "ppl": "2", "accuracy": "84.696", "wer": "15.3", "wps": "73.8", "ups": "0.58", "wpb": "127.4", "bsz": "8", "num_updates": "28000", "lr": "3.37321e-05", "gnorm": "8", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "62992"}
-[2024-06-09 06:57:54,984][train_inner][INFO] - {"epoch": 3, "update": 2.23, "loss": "1.024", "ntokens": "127.62", "acc_total": "127.62", "n_correct": "108.25", "wer_total": "127.62", "n_error": "19.37", "ppl": "2.03", "accuracy": "84.822", "wer": "15.178", "wps": "74", "ups": "0.58", "wpb": "127.6", "bsz": "8", "num_updates": "28200", "lr": "3.27365e-05", "gnorm": "8.162", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "63337"}
-[2024-06-09 07:00:45,747][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0
-[2024-06-09 07:03:41,750][train_inner][INFO] - {"epoch": 3, "update": 2.246, "loss": "1.025", "ntokens": "127.605", "acc_total": "127.605", "n_correct": "108.09", "wer_total": "127.605", "n_error": "19.515", "ppl": "2.04", "accuracy": "84.707", "wer": "15.293", "wps": "73.6", "ups": "0.58", "wpb": "127.6", "bsz": "8", "num_updates": "28400", "lr": "3.17704e-05", "gnorm": "7.917", "loss_scale": "512", "train_wall": "346", "gb_free": "6.5", "wall": "63684"}
-[2024-06-09 07:09:26,986][train_inner][INFO] - {"epoch": 3, "update": 2.262, "loss": "1.005", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "107.915", "wer_total": "127.595", "n_error": "19.68", "ppl": "2.01", "accuracy": "84.576", "wer": "15.424", "wps": "73.9", "ups": "0.58", "wpb": "127.6", "bsz": "8", "num_updates": "28600", "lr": "3.08327e-05", "gnorm": "8.112", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "64029"}
-[2024-06-09 07:15:12,298][train_inner][INFO] - {"epoch": 3, "update": 2.278, "loss": "0.978", "ntokens": "128.11", "acc_total": "128.11", "n_correct": "108.565", "wer_total": "128.11", "n_error": "19.535", "ppl": "1.97", "accuracy": "84.744", "wer": "15.249", "wps": "74.2", "ups": "0.58", "wpb": "128.1", "bsz": "8", "num_updates": "28800", "lr": "2.99228e-05", "gnorm": "8.027", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "64374"}
-[2024-06-09 07:20:57,380][train_inner][INFO] - {"epoch": 3, "update": 2.294, "loss": "1.032", "ntokens": "128.455", "acc_total": "128.455", "n_correct": "108.395", "wer_total": "128.455", "n_error": "20.05", "ppl": "2.05", "accuracy": "84.384", "wer": "15.609", "wps": "74.4", "ups": "0.58", "wpb": "128.5", "bsz": "8", "num_updates": "29000", "lr": "2.90397e-05", "gnorm": "8.138", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "64719"}
-[2024-06-09 07:26:42,561][train_inner][INFO] - {"epoch": 3, "update": 2.309, "loss": "0.993", "ntokens": "127.22", "acc_total": "127.22", "n_correct": "108.01", "wer_total": "127.22", "n_error": "19.2", "ppl": "1.99", "accuracy": "84.9", "wer": "15.092", "wps": "73.7", "ups": "0.58", "wpb": "127.2", "bsz": "8", "num_updates": "29200", "lr": "2.81826e-05", "gnorm": "7.623", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "65065"}
-[2024-06-09 07:32:27,962][train_inner][INFO] - {"epoch": 3, "update": 2.325, "loss": "1.022", "ntokens": "128.01", "acc_total": "128.01", "n_correct": "108.035", "wer_total": "128.01", "n_error": "19.975", "ppl": "2.03", "accuracy": "84.396", "wer": "15.604", "wps": "74.1", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "29400", "lr": "2.73509e-05", "gnorm": "8.151", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "65410"}
-[2024-06-09 07:38:13,068][train_inner][INFO] - {"epoch": 3, "update": 2.341, "loss": "0.995", "ntokens": "127.46", "acc_total": "127.46", "n_correct": "107.625", "wer_total": "127.46", "n_error": "19.835", "ppl": "1.99", "accuracy": "84.438", "wer": "15.562", "wps": "73.9", "ups": "0.58", "wpb": "127.5", "bsz": "8", "num_updates": "29600", "lr": "2.65436e-05", "gnorm": "7.934", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "65755"}
-[2024-06-09 07:43:58,331][train_inner][INFO] - {"epoch": 3, "update": 2.357, "loss": "0.988", "ntokens": "128.71", "acc_total": "128.71", "n_correct": "108.83", "wer_total": "128.71", "n_error": "19.88", "ppl": "1.98", "accuracy": "84.554", "wer": "15.446", "wps": "74.6", "ups": "0.58", "wpb": "128.7", "bsz": "8", "num_updates": "29800", "lr": "2.57603e-05", "gnorm": "8.195", "loss_scale": "512", "train_wall": "344", "gb_free": "6.5", "wall": "66100"}
-[2024-06-09 07:49:43,841][train_inner][INFO] - {"epoch": 3, "update": 2.373, "loss": "0.999", "ntokens": "128.475", "acc_total": "128.475", "n_correct": "108.655", "wer_total": "128.475", "n_error": "19.82", "ppl": "2", "accuracy": "84.573", "wer": "15.427", "wps": "74.4", "ups": "0.58", "wpb": "128.5", "bsz": "8", "num_updates": "30000", "lr": "2.5e-05", "gnorm": "7.976", "loss_scale": "512", "train_wall": "345", "gb_free": "6.5", "wall": "66446"}
-[2024-06-09 07:49:43,842][fairseq_cli.train][INFO] - Stopping training due to num_updates: 30000 >= max_update: 30000
-[2024-06-09 07:49:43,842][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-09 08:11:00,907][valid][INFO] - {"epoch": 3, "valid_loss": "1.2", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "12.7345", "valid_wer_total": "15.5723", "valid_n_error": "2.83716", "valid_ppl": "2.3", "valid_accuracy": "81.777", "valid_wer": "18.219", "valid_wps": "154.4", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "81.943"}
-[2024-06-09 08:11:00,907][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 30000 updates
-[2024-06-09 08:11:00,907][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_3_30000.pt
-[2024-06-09 08:11:04,046][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_3_30000.pt
-[2024-06-09 08:11:06,383][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_3_30000.pt (epoch 3 @ 30000 updates, score 81.777) (writing took 5.4761370189953595 seconds)
-[2024-06-09 08:11:06,640][fairseq_cli.train][INFO] - end of epoch 3 (average epoch stats below)
-[2024-06-09 08:11:06,653][train][INFO] - {"epoch": 3, "train_loss": "1.016", "train_ntokens": "127.773", "train_acc_total": "127.773", "train_n_correct": "107.664", "train_wer_total": "127.773", "train_n_error": "20.104", "train_ppl": "2.02", "train_accuracy": "84.262", "train_wer": "15.734", "train_wps": "56.3", "train_ups": "0.44", "train_wpb": "127.8", "train_bsz": "8", "train_num_updates": "30000", "train_lr": "2.5e-05", "train_gnorm": "8.002", "train_loss_scale": "512", "train_train_wall": "8113", "train_gb_free": "6.5", "train_wall": "67729"}
-[2024-06-09 08:11:06,654][fairseq_cli.train][INFO] - done training in 67728.1 seconds
+[2024-06-14 02:11:14,947][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
+[2024-06-14 02:11:14,947][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
+[2024-06-14 02:11:19,040][fairseq.trainer][INFO] - begin training epoch 1
+[2024-06-14 02:11:19,040][fairseq_cli.train][INFO] - Start iterating over samples
+[2024-06-14 02:16:38,888][train_inner][INFO] - {"epoch": 1, "update": 0.013, "loss": "7.663", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "18.12", "wer_total": "126.725", "n_error": "108.515", "ppl": "202.63", "accuracy": "14.299", "wer": "85.63", "wps": "79.3", "ups": "0.63", "wpb": "126.7", "bsz": "8", "num_updates": "200", "lr": "1.49e-05", "gnorm": "8.828", "loss_scale": "128", "train_wall": "319", "gb_free": "7.1", "wall": "325"}
+[2024-06-14 02:22:00,472][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "6.202", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "25.775", "wer_total": "126.93", "n_error": "100.945", "ppl": "73.59", "accuracy": "20.306", "wer": "79.528", "wps": "78.9", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "400", "lr": "2.48e-05", "gnorm": "3.682", "loss_scale": "128", "train_wall": "321", "gb_free": "7.1", "wall": "647"}
+[2024-06-14 02:27:21,699][train_inner][INFO] - {"epoch": 1, "update": 0.04, "loss": "6.076", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "28.52", "wer_total": "127.015", "n_error": "98.19", "ppl": "67.45", "accuracy": "22.454", "wer": "77.306", "wps": "79.1", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "600", "lr": "3.47e-05", "gnorm": "3.928", "loss_scale": "128", "train_wall": "321", "gb_free": "7.1", "wall": "968"}
+[2024-06-14 02:32:43,127][train_inner][INFO] - {"epoch": 1, "update": 0.053, "loss": "5.873", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "30.32", "wer_total": "126.865", "n_error": "96.315", "ppl": "58.61", "accuracy": "23.899", "wer": "75.919", "wps": "78.9", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "800", "lr": "4.46e-05", "gnorm": "4.054", "loss_scale": "128", "train_wall": "321", "gb_free": "7.1", "wall": "1289"}
+[2024-06-14 02:38:04,259][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "5.939", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "30.49", "wer_total": "127.025", "n_error": "96.27", "ppl": "61.33", "accuracy": "24.003", "wer": "75.788", "wps": "79.1", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "1000", "lr": "5.45e-05", "gnorm": "3.828", "loss_scale": "128", "train_wall": "320", "gb_free": "7.1", "wall": "1610"}
+[2024-06-14 02:43:25,537][train_inner][INFO] - {"epoch": 1, "update": 0.08, "loss": "5.894", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "30.73", "wer_total": "127.095", "n_error": "96.16", "ppl": "59.45", "accuracy": "24.179", "wer": "75.66", "wps": "79.1", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "1200", "lr": "6.44e-05", "gnorm": "3.592", "loss_scale": "128", "train_wall": "321", "gb_free": "7.1", "wall": "1932"}
+[2024-06-14 02:48:46,859][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "5.738", "ntokens": "127.62", "acc_total": "127.62", "n_correct": "31.95", "wer_total": "127.62", "n_error": "95.405", "ppl": "53.36", "accuracy": "25.035", "wer": "74.757", "wps": "79.4", "ups": "0.62", "wpb": "127.6", "bsz": "8", "num_updates": "1400", "lr": "7.43e-05", "gnorm": "3.426", "loss_scale": "128", "train_wall": "321", "gb_free": "7.1", "wall": "2253"}
+[2024-06-14 02:54:08,017][train_inner][INFO] - {"epoch": 1, "update": 0.106, "loss": "5.753", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "32.335", "wer_total": "127.41", "n_error": "94.77", "ppl": "53.94", "accuracy": "25.379", "wer": "74.382", "wps": "79.3", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "1600", "lr": "8.42e-05", "gnorm": "3.149", "loss_scale": "128", "train_wall": "321", "gb_free": "7.1", "wall": "2574"}
+[2024-06-14 02:59:29,100][train_inner][INFO] - {"epoch": 1, "update": 0.119, "loss": "5.797", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "31.895", "wer_total": "126.56", "n_error": "94.43", "ppl": "55.6", "accuracy": "25.201", "wer": "74.613", "wps": "78.8", "ups": "0.62", "wpb": "126.6", "bsz": "8", "num_updates": "1800", "lr": "9.41e-05", "gnorm": "2.923", "loss_scale": "128", "train_wall": "320", "gb_free": "7.1", "wall": "2895"}
+[2024-06-14 03:04:50,457][train_inner][INFO] - {"epoch": 1, "update": 0.133, "loss": "5.711", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "32.95", "wer_total": "126.875", "n_error": "93.745", "ppl": "52.38", "accuracy": "25.97", "wer": "73.888", "wps": "79", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "2000", "lr": "0.000104", "gnorm": "2.729", "loss_scale": "128", "train_wall": "321", "gb_free": "7.1", "wall": "3217"}
+[2024-06-14 03:10:11,583][train_inner][INFO] - {"epoch": 1, "update": 0.146, "loss": "5.668", "ntokens": "128.2", "acc_total": "128.2", "n_correct": "33.575", "wer_total": "128.2", "n_error": "94.315", "ppl": "50.85", "accuracy": "26.19", "wer": "73.569", "wps": "79.8", "ups": "0.62", "wpb": "128.2", "bsz": "8", "num_updates": "2200", "lr": "0.0001139", "gnorm": "2.573", "loss_scale": "256", "train_wall": "320", "gb_free": "7.1", "wall": "3538"}
+[2024-06-14 03:15:32,909][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "5.616", "ntokens": "127.775", "acc_total": "127.775", "n_correct": "34.58", "wer_total": "127.775", "n_error": "92.945", "ppl": "49.03", "accuracy": "27.063", "wer": "72.741", "wps": "79.5", "ups": "0.62", "wpb": "127.8", "bsz": "8", "num_updates": "2400", "lr": "0.0001238", "gnorm": "2.525", "loss_scale": "256", "train_wall": "321", "gb_free": "7.1", "wall": "3859"}
+[2024-06-14 03:18:13,586][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 03:58:16,640][valid][INFO] - {"epoch": 1, "valid_loss": "5.458", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "5.18491", "valid_wer_total": "18.1585", "valid_n_error": "12.946", "valid_ppl": "43.95", "valid_accuracy": "28.554", "valid_wer": "71.295", "valid_wps": "181.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "2500"}
+[2024-06-14 03:58:16,641][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 2500 updates
+[2024-06-14 03:58:16,641][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_2500.pt
+[2024-06-14 03:58:19,677][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_2500.pt
+[2024-06-14 03:58:22,240][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_2500.pt (epoch 1 @ 2500 updates, score 28.554) (writing took 5.599066474999745 seconds)
+[2024-06-14 04:01:02,613][train_inner][INFO] - {"epoch": 1, "update": 0.172, "loss": "5.519", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "35.845", "wer_total": "126.92", "n_error": "90.835", "ppl": "45.86", "accuracy": "28.242", "wer": "71.569", "wps": "9.3", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "2600", "lr": "0.0001337", "gnorm": "2.665", "loss_scale": "256", "train_wall": "320", "gb_free": "7.1", "wall": "6589"}
+[2024-06-14 04:06:23,973][train_inner][INFO] - {"epoch": 1, "update": 0.186, "loss": "5.471", "ntokens": "125.685", "acc_total": "125.685", "n_correct": "36.82", "wer_total": "125.685", "n_error": "88.58", "ppl": "44.35", "accuracy": "29.295", "wer": "70.478", "wps": "78.2", "ups": "0.62", "wpb": "125.7", "bsz": "8", "num_updates": "2800", "lr": "0.0001436", "gnorm": "2.872", "loss_scale": "256", "train_wall": "321", "gb_free": "7.1", "wall": "6910"}
+[2024-06-14 04:11:45,090][train_inner][INFO] - {"epoch": 1, "update": 0.199, "loss": "5.297", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "40.425", "wer_total": "127.19", "n_error": "86.5", "ppl": "39.31", "accuracy": "31.783", "wer": "68.008", "wps": "79.2", "ups": "0.62", "wpb": "127.2", "bsz": "8", "num_updates": "3000", "lr": "0.0001535", "gnorm": "3.13", "loss_scale": "256", "train_wall": "320", "gb_free": "7.1", "wall": "7231"}
+[2024-06-14 04:17:06,261][train_inner][INFO] - {"epoch": 1, "update": 0.212, "loss": "5.043", "ntokens": "126.535", "acc_total": "126.535", "n_correct": "43.655", "wer_total": "126.535", "n_error": "82.61", "ppl": "32.97", "accuracy": "34.5", "wer": "65.286", "wps": "78.8", "ups": "0.62", "wpb": "126.5", "bsz": "8", "num_updates": "3200", "lr": "0.0001634", "gnorm": "3.568", "loss_scale": "256", "train_wall": "321", "gb_free": "7.1", "wall": "7552"}
+[2024-06-14 04:22:27,462][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "4.874", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "46.585", "wer_total": "126.53", "n_error": "79.7", "ppl": "29.33", "accuracy": "36.817", "wer": "62.989", "wps": "78.8", "ups": "0.62", "wpb": "126.5", "bsz": "8", "num_updates": "3400", "lr": "0.0001733", "gnorm": "3.638", "loss_scale": "256", "train_wall": "321", "gb_free": "7.1", "wall": "7874"}
+[2024-06-14 04:27:48,556][train_inner][INFO] - {"epoch": 1, "update": 0.239, "loss": "4.646", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "49.77", "wer_total": "127.025", "n_error": "77.045", "ppl": "25.03", "accuracy": "39.181", "wer": "60.653", "wps": "79.1", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "3600", "lr": "0.0001832", "gnorm": "3.879", "loss_scale": "256", "train_wall": "320", "gb_free": "7.1", "wall": "8195"}
+[2024-06-14 04:33:09,903][train_inner][INFO] - {"epoch": 1, "update": 0.252, "loss": "4.44", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "52.245", "wer_total": "127.35", "n_error": "74.915", "ppl": "21.7", "accuracy": "41.025", "wer": "58.826", "wps": "79.3", "ups": "0.62", "wpb": "127.3", "bsz": "8", "num_updates": "3800", "lr": "0.0001931", "gnorm": "3.969", "loss_scale": "256", "train_wall": "321", "gb_free": "7.1", "wall": "8516"}
+[2024-06-14 04:38:31,134][train_inner][INFO] - {"epoch": 1, "update": 0.265, "loss": "4.345", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "54.015", "wer_total": "127.785", "n_error": "73.62", "ppl": "20.32", "accuracy": "42.27", "wer": "57.612", "wps": "79.6", "ups": "0.62", "wpb": "127.8", "bsz": "8", "num_updates": "4000", "lr": "0.000203", "gnorm": "4.001", "loss_scale": "256", "train_wall": "321", "gb_free": "7.1", "wall": "8837"}
+[2024-06-14 04:43:52,379][train_inner][INFO] - {"epoch": 1, "update": 0.278, "loss": "4.182", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "55.51", "wer_total": "126.255", "n_error": "70.575", "ppl": "18.15", "accuracy": "43.967", "wer": "55.899", "wps": "78.6", "ups": "0.62", "wpb": "126.3", "bsz": "8", "num_updates": "4200", "lr": "0.0002129", "gnorm": "4.117", "loss_scale": "512", "train_wall": "321", "gb_free": "7.1", "wall": "9158"}
+[2024-06-14 04:49:13,387][train_inner][INFO] - {"epoch": 1, "update": 0.292, "loss": "4.06", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "56.685", "wer_total": "125.9", "n_error": "69.105", "ppl": "16.67", "accuracy": "45.024", "wer": "54.889", "wps": "78.4", "ups": "0.62", "wpb": "125.9", "bsz": "8", "num_updates": "4400", "lr": "0.0002228", "gnorm": "4.06", "loss_scale": "512", "train_wall": "320", "gb_free": "7.1", "wall": "9479"}
+[2024-06-14 04:54:34,569][train_inner][INFO] - {"epoch": 1, "update": 0.305, "loss": "3.877", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "60.025", "wer_total": "127.885", "n_error": "67.74", "ppl": "14.7", "accuracy": "46.937", "wer": "52.969", "wps": "79.6", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "4600", "lr": "0.0002327", "gnorm": "4.073", "loss_scale": "512", "train_wall": "321", "gb_free": "7.1", "wall": "9801"}
+[2024-06-14 04:59:55,845][train_inner][INFO] - {"epoch": 1, "update": 0.318, "loss": "3.794", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "59.965", "wer_total": "126.21", "n_error": "66.105", "ppl": "13.87", "accuracy": "47.512", "wer": "52.377", "wps": "78.6", "ups": "0.62", "wpb": "126.2", "bsz": "8", "num_updates": "4800", "lr": "0.0002426", "gnorm": "4.175", "loss_scale": "512", "train_wall": "321", "gb_free": "7.1", "wall": "10122"}
+[2024-06-14 05:05:16,940][train_inner][INFO] - {"epoch": 1, "update": 0.331, "loss": "3.696", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "61.095", "wer_total": "126.87", "n_error": "65.595", "ppl": "12.96", "accuracy": "48.156", "wer": "51.703", "wps": "79", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "5000", "lr": "0.0002525", "gnorm": "4.12", "loss_scale": "512", "train_wall": "320", "gb_free": "7.1", "wall": "10443"}
+[2024-06-14 05:05:16,940][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 05:45:20,546][valid][INFO] - {"epoch": 1, "valid_loss": "3.351", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "9.49029", "valid_wer_total": "18.1585", "valid_n_error": "8.64948", "valid_ppl": "10.21", "valid_accuracy": "52.264", "valid_wer": "47.633", "valid_wps": "181.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "5000", "valid_best_accuracy": "52.264"}
+[2024-06-14 05:45:20,546][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 5000 updates
+[2024-06-14 05:45:20,547][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_5000.pt
+[2024-06-14 05:45:23,523][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_5000.pt
+[2024-06-14 05:45:27,851][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_5000.pt (epoch 1 @ 5000 updates, score 52.264) (writing took 7.305071680000765 seconds)
+[2024-06-14 05:50:48,507][train_inner][INFO] - {"epoch": 1, "update": 0.345, "loss": "3.481", "ntokens": "126.51", "acc_total": "126.51", "n_correct": "63.415", "wer_total": "126.51", "n_error": "62.965", "ppl": "11.17", "accuracy": "50.126", "wer": "49.771", "wps": "9.3", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "5200", "lr": "0.0002624", "gnorm": "4.078", "loss_scale": "512", "train_wall": "320", "gb_free": "7.1", "wall": "13175"}
+[2024-06-14 05:56:09,903][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "3.483", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "64.015", "wer_total": "127.425", "n_error": "63.275", "ppl": "11.18", "accuracy": "50.237", "wer": "49.657", "wps": "79.3", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "5400", "lr": "0.0002723", "gnorm": "4.071", "loss_scale": "512", "train_wall": "321", "gb_free": "7.1", "wall": "13496"}
+[2024-06-14 06:01:31,155][train_inner][INFO] - {"epoch": 1, "update": 0.371, "loss": "3.363", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "65.685", "wer_total": "127.52", "n_error": "61.725", "ppl": "10.29", "accuracy": "51.51", "wer": "48.404", "wps": "79.4", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "5600", "lr": "0.0002822", "gnorm": "4.023", "loss_scale": "512", "train_wall": "321", "gb_free": "7.1", "wall": "13817"}
+[2024-06-14 06:06:52,136][train_inner][INFO] - {"epoch": 1, "update": 0.384, "loss": "3.424", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "65.32", "wer_total": "126.59", "n_error": "61.14", "ppl": "10.73", "accuracy": "51.6", "wer": "48.298", "wps": "78.9", "ups": "0.62", "wpb": "126.6", "bsz": "8", "num_updates": "5800", "lr": "0.0002921", "gnorm": "4.002", "loss_scale": "512", "train_wall": "320", "gb_free": "7.1", "wall": "14138"}
+[2024-06-14 06:12:13,113][train_inner][INFO] - {"epoch": 1, "update": 0.398, "loss": "3.269", "ntokens": "127.395", "acc_total": "127.395", "n_correct": "66.685", "wer_total": "127.395", "n_error": "60.55", "ppl": "9.64", "accuracy": "52.345", "wer": "47.529", "wps": "79.4", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "6000", "lr": "0.000302", "gnorm": "4.063", "loss_scale": "512", "train_wall": "320", "gb_free": "7.1", "wall": "14459"}
+[2024-06-14 06:17:34,074][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "3.166", "ntokens": "126.995", "acc_total": "126.995", "n_correct": "67.805", "wer_total": "126.995", "n_error": "59.055", "ppl": "8.98", "accuracy": "53.392", "wer": "46.502", "wps": "79.1", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "6200", "lr": "0.0003119", "gnorm": "3.971", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "14780"}
+[2024-06-14 06:22:55,219][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "3.222", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "66.96", "wer_total": "126.01", "n_error": "58.945", "ppl": "9.33", "accuracy": "53.139", "wer": "46.778", "wps": "78.5", "ups": "0.62", "wpb": "126", "bsz": "8", "num_updates": "6400", "lr": "0.0003218", "gnorm": "3.992", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "15101"}
+[2024-06-14 06:28:16,341][train_inner][INFO] - {"epoch": 1, "update": 0.437, "loss": "3.137", "ntokens": "126.33", "acc_total": "126.33", "n_correct": "68.01", "wer_total": "126.33", "n_error": "58.195", "ppl": "8.8", "accuracy": "53.835", "wer": "46.066", "wps": "78.7", "ups": "0.62", "wpb": "126.3", "bsz": "8", "num_updates": "6600", "lr": "0.0003317", "gnorm": "4.101", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "15422"}
+[2024-06-14 06:33:37,454][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "3.047", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "69.67", "wer_total": "126.735", "n_error": "56.92", "ppl": "8.27", "accuracy": "54.973", "wer": "44.913", "wps": "78.9", "ups": "0.62", "wpb": "126.7", "bsz": "8", "num_updates": "6800", "lr": "0.0003416", "gnorm": "3.924", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "15744"}
+[2024-06-14 06:38:58,575][train_inner][INFO] - {"epoch": 1, "update": 0.464, "loss": "2.924", "ntokens": "126.98", "acc_total": "126.98", "n_correct": "71.445", "wer_total": "126.98", "n_error": "55.47", "ppl": "7.59", "accuracy": "56.265", "wer": "43.684", "wps": "79.1", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "7000", "lr": "0.0003515", "gnorm": "3.939", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "16065"}
+[2024-06-14 06:44:19,376][train_inner][INFO] - {"epoch": 1, "update": 0.477, "loss": "3.023", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "70.43", "wer_total": "126.97", "n_error": "56.465", "ppl": "8.13", "accuracy": "55.47", "wer": "44.471", "wps": "79.2", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "7200", "lr": "0.0003614", "gnorm": "4.055", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "16385"}
+[2024-06-14 06:49:40,293][train_inner][INFO] - {"epoch": 1, "update": 0.491, "loss": "2.836", "ntokens": "127.45", "acc_total": "127.45", "n_correct": "72.655", "wer_total": "127.45", "n_error": "54.67", "ppl": "7.14", "accuracy": "57.007", "wer": "42.895", "wps": "79.4", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "7400", "lr": "0.0003713", "gnorm": "4.007", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "16706"}
+[2024-06-14 06:52:20,777][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 07:32:21,962][valid][INFO] - {"epoch": 1, "valid_loss": "2.618", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "10.8741", "valid_wer_total": "18.1585", "valid_n_error": "7.26607", "valid_ppl": "6.14", "valid_accuracy": "59.884", "valid_wer": "40.015", "valid_wps": "181.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "7500", "valid_best_accuracy": "59.884"}
+[2024-06-14 07:32:21,962][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 7500 updates
+[2024-06-14 07:32:21,963][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_7500.pt
+[2024-06-14 07:32:24,957][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_7500.pt
+[2024-06-14 07:32:29,242][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_7500.pt (epoch 1 @ 7500 updates, score 59.884) (writing took 7.279796884999087 seconds)
+[2024-06-14 07:35:09,496][train_inner][INFO] - {"epoch": 1, "update": 0.504, "loss": "2.873", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "71.855", "wer_total": "127.32", "n_error": "55.365", "ppl": "7.33", "accuracy": "56.437", "wer": "43.485", "wps": "9.3", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "7600", "lr": "0.0003812", "gnorm": "3.936", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "19436"}
+[2024-06-14 07:40:30,496][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "2.861", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "72.47", "wer_total": "127.11", "n_error": "54.505", "ppl": "7.26", "accuracy": "57.014", "wer": "42.88", "wps": "79.2", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "7800", "lr": "0.0003911", "gnorm": "3.95", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "19757"}
+[2024-06-14 07:45:51,348][train_inner][INFO] - {"epoch": 1, "update": 0.53, "loss": "2.837", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "72.255", "wer_total": "126.875", "n_error": "54.515", "ppl": "7.14", "accuracy": "56.95", "wer": "42.967", "wps": "79.1", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "8000", "lr": "0.000401", "gnorm": "3.847", "loss_scale": "1024", "train_wall": "320", "gb_free": "7.1", "wall": "20077"}
+[2024-06-14 07:51:12,268][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "2.796", "ntokens": "126.05", "acc_total": "126.05", "n_correct": "72.095", "wer_total": "126.05", "n_error": "53.87", "ppl": "6.94", "accuracy": "57.196", "wer": "42.737", "wps": "78.6", "ups": "0.62", "wpb": "126", "bsz": "8", "num_updates": "8200", "lr": "0.0004109", "gnorm": "4.119", "loss_scale": "2048", "train_wall": "320", "gb_free": "7.1", "wall": "20398"}
+[2024-06-14 07:56:33,318][train_inner][INFO] - {"epoch": 1, "update": 0.557, "loss": "2.745", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "73.53", "wer_total": "126.785", "n_error": "53.18", "ppl": "6.71", "accuracy": "57.996", "wer": "41.945", "wps": "79", "ups": "0.62", "wpb": "126.8", "bsz": "8", "num_updates": "8400", "lr": "0.0004208", "gnorm": "4.073", "loss_scale": "2048", "train_wall": "320", "gb_free": "7.1", "wall": "20719"}
+[2024-06-14 08:01:54,552][train_inner][INFO] - {"epoch": 1, "update": 0.57, "loss": "2.734", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "73.22", "wer_total": "126.675", "n_error": "53.345", "ppl": "6.65", "accuracy": "57.801", "wer": "42.112", "wps": "78.9", "ups": "0.62", "wpb": "126.7", "bsz": "8", "num_updates": "8600", "lr": "0.0004307", "gnorm": "4", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "21041"}
+[2024-06-14 08:07:15,778][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "2.73", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "73.93", "wer_total": "127.09", "n_error": "53.035", "ppl": "6.64", "accuracy": "58.171", "wer": "41.73", "wps": "79.1", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "8800", "lr": "0.0004406", "gnorm": "3.929", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "21362"}
+[2024-06-14 08:12:36,835][train_inner][INFO] - {"epoch": 1, "update": 0.597, "loss": "2.722", "ntokens": "127.43", "acc_total": "127.43", "n_correct": "74.655", "wer_total": "127.43", "n_error": "52.675", "ppl": "6.6", "accuracy": "58.585", "wer": "41.336", "wps": "79.4", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "9000", "lr": "0.0004505", "gnorm": "4.029", "loss_scale": "2048", "train_wall": "320", "gb_free": "7.1", "wall": "21683"}
+[2024-06-14 08:17:58,160][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "2.709", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "74.16", "wer_total": "127.145", "n_error": "52.875", "ppl": "6.54", "accuracy": "58.327", "wer": "41.586", "wps": "79.1", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "9200", "lr": "0.0004604", "gnorm": "4.139", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "22004"}
+[2024-06-14 08:23:19,503][train_inner][INFO] - {"epoch": 1, "update": 0.623, "loss": "2.686", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "74.1", "wer_total": "125.995", "n_error": "51.795", "ppl": "6.44", "accuracy": "58.812", "wer": "41.109", "wps": "78.4", "ups": "0.62", "wpb": "126", "bsz": "8", "num_updates": "9400", "lr": "0.0004703", "gnorm": "3.956", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "22326"}
+[2024-06-14 08:28:41,116][train_inner][INFO] - {"epoch": 1, "update": 0.636, "loss": "2.652", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "74.88", "wer_total": "126.87", "n_error": "51.87", "ppl": "6.29", "accuracy": "59.021", "wer": "40.884", "wps": "78.9", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "9600", "lr": "0.0004802", "gnorm": "4.087", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "22647"}
+[2024-06-14 08:34:02,612][train_inner][INFO] - {"epoch": 1, "update": 0.65, "loss": "2.646", "ntokens": "125.68", "acc_total": "125.68", "n_correct": "74.565", "wer_total": "125.68", "n_error": "50.995", "ppl": "6.26", "accuracy": "59.329", "wer": "40.575", "wps": "78.2", "ups": "0.62", "wpb": "125.7", "bsz": "8", "num_updates": "9800", "lr": "0.0004901", "gnorm": "4.092", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "22969"}
+[2024-06-14 08:39:24,348][train_inner][INFO] - {"epoch": 1, "update": 0.663, "loss": "2.634", "ntokens": "127.56", "acc_total": "127.56", "n_correct": "74.92", "wer_total": "127.56", "n_error": "52.56", "ppl": "6.21", "accuracy": "58.733", "wer": "41.204", "wps": "79.3", "ups": "0.62", "wpb": "127.6", "bsz": "8", "num_updates": "10000", "lr": "0.0005", "gnorm": "4.029", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "23290"}
+[2024-06-14 08:39:24,348][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 09:19:32,412][valid][INFO] - {"epoch": 1, "valid_loss": "2.324", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "11.4969", "valid_wer_total": "18.1585", "valid_n_error": "6.64877", "valid_ppl": "5.01", "valid_accuracy": "63.314", "valid_wer": "36.615", "valid_wps": "180.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "10000", "valid_best_accuracy": "63.314"}
+[2024-06-14 09:19:32,413][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 10000 updates
+[2024-06-14 09:19:32,413][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_10000.pt
+[2024-06-14 09:19:35,453][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_10000.pt
+[2024-06-14 09:19:39,637][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_10000.pt (epoch 1 @ 10000 updates, score 63.314) (writing took 7.224571621998621 seconds)
+[2024-06-14 09:25:01,461][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "2.582", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "75.385", "wer_total": "126.605", "n_error": "51.115", "ppl": "5.99", "accuracy": "59.543", "wer": "40.374", "wps": "9.3", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "10200", "lr": "0.000485243", "gnorm": "4.211", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "26028"}
+[2024-06-14 09:30:23,407][train_inner][INFO] - {"epoch": 1, "update": 0.689, "loss": "2.547", "ntokens": "127.295", "acc_total": "127.295", "n_correct": "77.75", "wer_total": "127.295", "n_error": "49.48", "ppl": "5.84", "accuracy": "61.079", "wer": "38.87", "wps": "79.1", "ups": "0.62", "wpb": "127.3", "bsz": "8", "num_updates": "10400", "lr": "0.000470922", "gnorm": "3.986", "loss_scale": "4096", "train_wall": "321", "gb_free": "7.1", "wall": "26349"}
+[2024-06-14 09:31:55,062][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
+[2024-06-14 09:35:46,843][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "2.556", "ntokens": "127.74", "acc_total": "127.74", "n_correct": "78.525", "wer_total": "127.74", "n_error": "49.135", "ppl": "5.88", "accuracy": "61.473", "wer": "38.465", "wps": "79", "ups": "0.62", "wpb": "127.7", "bsz": "8", "num_updates": "10600", "lr": "0.000457024", "gnorm": "3.953", "loss_scale": "2048", "train_wall": "323", "gb_free": "7.1", "wall": "26673"}
+[2024-06-14 09:41:08,789][train_inner][INFO] - {"epoch": 1, "update": 0.716, "loss": "2.538", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "77.71", "wer_total": "127.33", "n_error": "49.565", "ppl": "5.81", "accuracy": "61.03", "wer": "38.926", "wps": "79.1", "ups": "0.62", "wpb": "127.3", "bsz": "8", "num_updates": "10800", "lr": "0.000443536", "gnorm": "4.123", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "26995"}
+[2024-06-14 09:46:30,829][train_inner][INFO] - {"epoch": 1, "update": 0.729, "loss": "2.462", "ntokens": "126.09", "acc_total": "126.09", "n_correct": "77.64", "wer_total": "126.09", "n_error": "48.38", "ppl": "5.51", "accuracy": "61.575", "wer": "38.369", "wps": "78.3", "ups": "0.62", "wpb": "126.1", "bsz": "8", "num_updates": "11000", "lr": "0.000430446", "gnorm": "3.9", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "27317"}
+[2024-06-14 09:51:52,769][train_inner][INFO] - {"epoch": 1, "update": 0.742, "loss": "2.38", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "80.465", "wer_total": "126.625", "n_error": "46.08", "ppl": "5.21", "accuracy": "63.546", "wer": "36.391", "wps": "78.7", "ups": "0.62", "wpb": "126.6", "bsz": "8", "num_updates": "11200", "lr": "0.000417742", "gnorm": "3.964", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "27639"}
+[2024-06-14 09:57:14,548][train_inner][INFO] - {"epoch": 1, "update": 0.756, "loss": "2.412", "ntokens": "125.82", "acc_total": "125.82", "n_correct": "79.87", "wer_total": "125.82", "n_error": "45.86", "ppl": "5.32", "accuracy": "63.48", "wer": "36.449", "wps": "78.2", "ups": "0.62", "wpb": "125.8", "bsz": "8", "num_updates": "11400", "lr": "0.000405413", "gnorm": "4.018", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "27961"}
+[2024-06-14 10:02:36,537][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "2.383", "ntokens": "126.955", "acc_total": "126.955", "n_correct": "81.18", "wer_total": "126.955", "n_error": "45.71", "ppl": "5.22", "accuracy": "63.944", "wer": "36.005", "wps": "78.9", "ups": "0.62", "wpb": "127", "bsz": "8", "num_updates": "11600", "lr": "0.000393448", "gnorm": "3.977", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "28283"}
+[2024-06-14 10:07:58,510][train_inner][INFO] - {"epoch": 1, "update": 0.782, "loss": "2.341", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "83.935", "wer_total": "126.705", "n_error": "42.715", "ppl": "5.07", "accuracy": "66.244", "wer": "33.712", "wps": "78.7", "ups": "0.62", "wpb": "126.7", "bsz": "8", "num_updates": "11800", "lr": "0.000381836", "gnorm": "3.831", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "28605"}
+[2024-06-14 10:13:20,736][train_inner][INFO] - {"epoch": 1, "update": 0.796, "loss": "2.296", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "83.395", "wer_total": "126.675", "n_error": "43.235", "ppl": "4.91", "accuracy": "65.834", "wer": "34.131", "wps": "78.6", "ups": "0.62", "wpb": "126.7", "bsz": "8", "num_updates": "12000", "lr": "0.000370567", "gnorm": "3.637", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "28927"}
+[2024-06-14 10:18:42,875][train_inner][INFO] - {"epoch": 1, "update": 0.809, "loss": "2.322", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "82.93", "wer_total": "126.855", "n_error": "43.86", "ppl": "5", "accuracy": "65.374", "wer": "34.575", "wps": "78.8", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "12200", "lr": "0.000359631", "gnorm": "3.962", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "29249"}
+[2024-06-14 10:23:14,916][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-14 10:24:06,540][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "2.283", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "86.07", "wer_total": "127.055", "n_error": "40.93", "ppl": "4.87", "accuracy": "67.742", "wer": "32.214", "wps": "78.5", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "12400", "lr": "0.000349017", "gnorm": "3.95", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "29573"}
+[2024-06-14 10:26:47,484][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 11:06:56,912][valid][INFO] - {"epoch": 1, "valid_loss": "1.954", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.1943", "valid_wer_total": "18.1585", "valid_n_error": "4.95919", "valid_ppl": "3.87", "valid_accuracy": "72.662", "valid_wer": "27.311", "valid_wps": "180.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "12500", "valid_best_accuracy": "72.662"}
+[2024-06-14 11:06:56,913][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12500 updates
+[2024-06-14 11:06:56,913][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12500.pt
+[2024-06-14 11:06:59,957][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12500.pt
+[2024-06-14 11:07:04,130][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12500.pt (epoch 1 @ 12500 updates, score 72.662) (writing took 7.217422046000138 seconds)
+[2024-06-14 11:09:45,226][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "2.195", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "85.965", "wer_total": "126.58", "n_error": "40.56", "ppl": "4.58", "accuracy": "67.914", "wer": "32.043", "wps": "9.2", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "12600", "lr": "0.000338716", "gnorm": "3.667", "loss_scale": "1024", "train_wall": "321", "gb_free": "7.1", "wall": "32311"}
+[2024-06-14 11:15:07,438][train_inner][INFO] - {"epoch": 1, "update": 0.849, "loss": "2.196", "ntokens": "127.27", "acc_total": "127.27", "n_correct": "85.77", "wer_total": "127.27", "n_error": "41.455", "ppl": "4.58", "accuracy": "67.392", "wer": "32.572", "wps": "79", "ups": "0.62", "wpb": "127.3", "bsz": "8", "num_updates": "12800", "lr": "0.00032872", "gnorm": "3.63", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "32634"}
+[2024-06-14 11:20:29,797][train_inner][INFO] - {"epoch": 1, "update": 0.862, "loss": "2.199", "ntokens": "125.955", "acc_total": "125.955", "n_correct": "82.31", "wer_total": "125.955", "n_error": "43.605", "ppl": "4.59", "accuracy": "65.349", "wer": "34.62", "wps": "78.1", "ups": "0.62", "wpb": "126", "bsz": "8", "num_updates": "13000", "lr": "0.000319018", "gnorm": "3.662", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "32956"}
+[2024-06-14 11:25:52,049][train_inner][INFO] - {"epoch": 1, "update": 0.875, "loss": "2.201", "ntokens": "126.89", "acc_total": "126.89", "n_correct": "85.315", "wer_total": "126.89", "n_error": "41.5", "ppl": "4.6", "accuracy": "67.235", "wer": "32.705", "wps": "78.8", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "13200", "lr": "0.000309603", "gnorm": "3.741", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "33278"}
+[2024-06-14 11:31:14,569][train_inner][INFO] - {"epoch": 1, "update": 0.888, "loss": "2.196", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "83.655", "wer_total": "127.49", "n_error": "43.8", "ppl": "4.58", "accuracy": "65.617", "wer": "34.356", "wps": "79.1", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "13400", "lr": "0.000300466", "gnorm": "3.691", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "33601"}
+[2024-06-14 11:36:36,957][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "2.1", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "89.14", "wer_total": "126.87", "n_error": "37.695", "ppl": "4.29", "accuracy": "70.261", "wer": "29.712", "wps": "78.7", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "13600", "lr": "0.000291598", "gnorm": "3.503", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "33923"}
+[2024-06-14 11:41:59,477][train_inner][INFO] - {"epoch": 1, "update": 0.915, "loss": "2.047", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "88.93", "wer_total": "127.055", "n_error": "38.09", "ppl": "4.13", "accuracy": "69.993", "wer": "29.979", "wps": "78.8", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "13800", "lr": "0.000282992", "gnorm": "3.57", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "34246"}
+[2024-06-14 11:47:21,979][train_inner][INFO] - {"epoch": 1, "update": 0.928, "loss": "2.088", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "87.74", "wer_total": "127.135", "n_error": "39.36", "ppl": "4.25", "accuracy": "69.013", "wer": "30.959", "wps": "78.8", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "14000", "lr": "0.00027464", "gnorm": "3.669", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "34568"}
+[2024-06-14 11:52:44,412][train_inner][INFO] - {"epoch": 1, "update": 0.941, "loss": "2.046", "ntokens": "126.005", "acc_total": "126.005", "n_correct": "86.895", "wer_total": "126.005", "n_error": "39.045", "ppl": "4.13", "accuracy": "68.962", "wer": "30.987", "wps": "78.2", "ups": "0.62", "wpb": "126", "bsz": "8", "num_updates": "14200", "lr": "0.000266535", "gnorm": "3.535", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "34890"}
+[2024-06-14 11:58:06,951][train_inner][INFO] - {"epoch": 1, "update": 0.955, "loss": "2.075", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "88.76", "wer_total": "126.9", "n_error": "38.1", "ppl": "4.21", "accuracy": "69.945", "wer": "30.024", "wps": "78.7", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "14400", "lr": "0.000258668", "gnorm": "3.433", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "35213"}
+[2024-06-14 12:03:29,703][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "2.003", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "88.865", "wer_total": "127.485", "n_error": "38.575", "ppl": "4.01", "accuracy": "69.706", "wer": "30.258", "wps": "79", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "14600", "lr": "0.000251034", "gnorm": "3.685", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "35536"}
+[2024-06-14 12:08:52,167][train_inner][INFO] - {"epoch": 1, "update": 0.981, "loss": "2.031", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "89.14", "wer_total": "127.26", "n_error": "38.095", "ppl": "4.09", "accuracy": "70.046", "wer": "29.935", "wps": "78.9", "ups": "0.62", "wpb": "127.3", "bsz": "8", "num_updates": "14800", "lr": "0.000243626", "gnorm": "3.395", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "35858"}
+[2024-06-14 12:14:14,906][train_inner][INFO] - {"epoch": 1, "update": 0.994, "loss": "1.993", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "88.555", "wer_total": "127.385", "n_error": "38.78", "ppl": "3.98", "accuracy": "69.518", "wer": "30.443", "wps": "78.9", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "15000", "lr": "0.000236435", "gnorm": "3.41", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "36181"}
+[2024-06-14 12:14:14,906][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 12:54:28,315][valid][INFO] - {"epoch": 1, "valid_loss": "1.757", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "12.962", "valid_wer_total": "18.1585", "valid_n_error": "5.19125", "valid_ppl": "3.38", "valid_accuracy": "71.383", "valid_wer": "28.589", "valid_wps": "180.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "72.662"}
+[2024-06-14 12:54:28,316][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15000 updates
+[2024-06-14 12:54:28,316][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_15000.pt
+[2024-06-14 12:54:31,331][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_15000.pt
+[2024-06-14 12:54:33,473][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_15000.pt (epoch 1 @ 15000 updates, score 71.383) (writing took 5.156613411003491 seconds)
+[2024-06-14 12:56:48,613][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 13:37:07,362][valid][INFO] - {"epoch": 1, "valid_loss": "1.736", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "12.9296", "valid_wer_total": "18.1585", "valid_n_error": "5.22222", "valid_ppl": "3.33", "valid_accuracy": "71.204", "valid_wer": "28.759", "valid_wps": "180.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15084", "valid_best_accuracy": "72.662"}
+[2024-06-14 13:37:07,363][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15084 updates
+[2024-06-14 13:37:07,363][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_last.pt
+[2024-06-14 13:37:11,177][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_last.pt
+[2024-06-14 13:37:11,223][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 1 @ 15084 updates, score 71.204) (writing took 3.8604563690023497 seconds)
+[2024-06-14 13:37:11,224][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below)
+[2024-06-14 13:37:11,225][train][INFO] - {"epoch": 1, "train_loss": "3.48", "train_ntokens": "126.897", "train_acc_total": "126.897", "train_n_correct": "65.1643", "train_wer_total": "126.897", "train_n_error": "61.6043", "train_ppl": "11.16", "train_accuracy": "51.352", "train_wer": "48.547", "train_wps": "46.5", "train_ups": "0.37", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "15084", "train_lr": "0.000233479", "train_gnorm": "3.838", "train_loss_scale": "2048", "train_train_wall": "24201", "train_gb_free": "7.1", "train_wall": "41157"}
+[2024-06-14 13:37:11,260][fairseq.trainer][INFO] - begin training epoch 2
+[2024-06-14 13:37:11,260][fairseq_cli.train][INFO] - Start iterating over samples
+[2024-06-14 13:39:26,804][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-14 13:40:20,267][train_inner][INFO] - {"epoch": 2, "update": 1.008, "loss": "1.912", "ntokens": "127.115", "acc_total": "127.115", "n_correct": "88.015", "wer_total": "127.115", "n_error": "39.05", "ppl": "3.76", "accuracy": "69.24", "wer": "30.72", "wps": "4.9", "ups": "0.04", "wpb": "127.1", "bsz": "8", "num_updates": "15200", "lr": "0.000229457", "gnorm": "3.437", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "41346"}
+[2024-06-14 13:45:43,495][train_inner][INFO] - {"epoch": 2, "update": 1.021, "loss": "1.848", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "88.695", "wer_total": "126.325", "n_error": "37.58", "ppl": "3.6", "accuracy": "70.212", "wer": "29.749", "wps": "78.2", "ups": "0.62", "wpb": "126.3", "bsz": "8", "num_updates": "15400", "lr": "0.000222685", "gnorm": "3.242", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "41670"}
+[2024-06-14 13:51:07,020][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "1.848", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "89.88", "wer_total": "126.915", "n_error": "37.02", "ppl": "3.6", "accuracy": "70.819", "wer": "29.169", "wps": "78.5", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "15600", "lr": "0.000216113", "gnorm": "3.451", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "41993"}
+[2024-06-14 13:56:30,285][train_inner][INFO] - {"epoch": 2, "update": 1.048, "loss": "1.879", "ntokens": "125.545", "acc_total": "125.545", "n_correct": "88.72", "wer_total": "125.545", "n_error": "36.775", "ppl": "3.68", "accuracy": "70.668", "wer": "29.292", "wps": "77.7", "ups": "0.62", "wpb": "125.5", "bsz": "8", "num_updates": "15800", "lr": "0.000209735", "gnorm": "3.268", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "42316"}
+[2024-06-14 14:01:53,729][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "1.854", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "87.38", "wer_total": "127.405", "n_error": "39.96", "ppl": "3.61", "accuracy": "68.584", "wer": "31.365", "wps": "78.8", "ups": "0.62", "wpb": "127.4", "bsz": "8", "num_updates": "16000", "lr": "0.000203545", "gnorm": "3.395", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "42640"}
+[2024-06-14 14:07:16,961][train_inner][INFO] - {"epoch": 2, "update": 1.074, "loss": "1.74", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "90.865", "wer_total": "126.915", "n_error": "36.02", "ppl": "3.34", "accuracy": "71.595", "wer": "28.381", "wps": "78.5", "ups": "0.62", "wpb": "126.9", "bsz": "8", "num_updates": "16200", "lr": "0.000197538", "gnorm": "3.187", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "42963"}
+[2024-06-14 14:12:39,945][train_inner][INFO] - {"epoch": 2, "update": 1.087, "loss": "1.761", "ntokens": "127.14", "acc_total": "127.14", "n_correct": "93.25", "wer_total": "127.14", "n_error": "33.85", "ppl": "3.39", "accuracy": "73.344", "wer": "26.624", "wps": "78.7", "ups": "0.62", "wpb": "127.1", "bsz": "8", "num_updates": "16400", "lr": "0.000191708", "gnorm": "3.414", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "43286"}
+[2024-06-14 14:18:03,006][train_inner][INFO] - {"epoch": 2, "update": 1.101, "loss": "1.756", "ntokens": "127.51", "acc_total": "127.51", "n_correct": "91.695", "wer_total": "127.51", "n_error": "35.785", "ppl": "3.38", "accuracy": "71.912", "wer": "28.064", "wps": "78.9", "ups": "0.62", "wpb": "127.5", "bsz": "8", "num_updates": "16600", "lr": "0.00018605", "gnorm": "3.258", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "43609"}
+[2024-06-14 14:23:26,099][train_inner][INFO] - {"epoch": 2, "update": 1.114, "loss": "1.794", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "91.2", "wer_total": "126.405", "n_error": "35.18", "ppl": "3.47", "accuracy": "72.149", "wer": "27.831", "wps": "78.2", "ups": "0.62", "wpb": "126.4", "bsz": "8", "num_updates": "16800", "lr": "0.000180559", "gnorm": "3.291", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "43932"}
+[2024-06-14 14:28:49,357][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "1.72", "ntokens": "127.925", "acc_total": "127.925", "n_correct": "92.695", "wer_total": "127.925", "n_error": "35.185", "ppl": "3.29", "accuracy": "72.46", "wer": "27.504", "wps": "79.1", "ups": "0.62", "wpb": "127.9", "bsz": "8", "num_updates": "17000", "lr": "0.00017523", "gnorm": "3.12", "loss_scale": "1024", "train_wall": "323", "gb_free": "7.1", "wall": "44255"}
+[2024-06-14 14:34:12,348][train_inner][INFO] - {"epoch": 2, "update": 1.14, "loss": "1.753", "ntokens": "126.125", "acc_total": "126.125", "n_correct": "89.795", "wer_total": "126.125", "n_error": "36.29", "ppl": "3.37", "accuracy": "71.195", "wer": "28.773", "wps": "78.1", "ups": "0.62", "wpb": "126.1", "bsz": "8", "num_updates": "17200", "lr": "0.000170059", "gnorm": "3.194", "loss_scale": "1024", "train_wall": "322", "gb_free": "7.1", "wall": "44578"}
+[2024-06-14 14:39:35,164][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "1.747", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "90.59", "wer_total": "127.245", "n_error": "36.64", "ppl": "3.36", "accuracy": "71.193", "wer": "28.795", "wps": "78.8", "ups": "0.62", "wpb": "127.2", "bsz": "8", "num_updates": "17400", "lr": "0.00016504", "gnorm": "3.108", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "44901"}
+[2024-06-14 14:42:16,522][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 15:22:30,841][valid][INFO] - {"epoch": 2, "valid_loss": "1.617", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.748", "valid_wer_total": "18.1585", "valid_n_error": "4.40734", "valid_ppl": "3.07", "valid_accuracy": "75.711", "valid_wer": "24.272", "valid_wps": "180.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "17500", "valid_best_accuracy": "75.711"}
+[2024-06-14 15:22:30,841][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 17500 updates
+[2024-06-14 15:22:30,842][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_17500.pt
+[2024-06-14 15:22:33,836][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_17500.pt
+[2024-06-14 15:22:38,015][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_17500.pt (epoch 2 @ 17500 updates, score 75.711) (writing took 7.173529824998695 seconds)
+[2024-06-14 15:25:18,860][train_inner][INFO] - {"epoch": 2, "update": 1.167, "loss": "1.76", "ntokens": "126.2", "acc_total": "126.2", "n_correct": "92.05", "wer_total": "126.2", "n_error": "34.115", "ppl": "3.39", "accuracy": "72.94", "wer": "27.032", "wps": "9.2", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "17600", "lr": "0.000160169", "gnorm": "3.214", "loss_scale": "2048", "train_wall": "321", "gb_free": "7.1", "wall": "47645"}
+[2024-06-14 15:30:41,029][train_inner][INFO] - {"epoch": 2, "update": 1.18, "loss": "1.671", "ntokens": "128.29", "acc_total": "128.29", "n_correct": "91.88", "wer_total": "128.29", "n_error": "36.385", "ppl": "3.19", "accuracy": "71.619", "wer": "28.362", "wps": "79.6", "ups": "0.62", "wpb": "128.3", "bsz": "8", "num_updates": "17800", "lr": "0.000155442", "gnorm": "3.117", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "47967"}
+[2024-06-14 15:36:03,206][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "1.741", "ntokens": "126.235", "acc_total": "126.235", "n_correct": "90.35", "wer_total": "126.235", "n_error": "35.86", "ppl": "3.34", "accuracy": "71.573", "wer": "28.407", "wps": "78.4", "ups": "0.62", "wpb": "126.2", "bsz": "8", "num_updates": "18000", "lr": "0.000150854", "gnorm": "3.187", "loss_scale": "2048", "train_wall": "322", "gb_free": "7.1", "wall": "48289"}
+[2024-06-14 15:41:48,625][train_inner][INFO] - {"epoch": 2, "update": 1.207, "loss": "1.773", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "92.245", "wer_total": "127.695", "n_error": "35.42", "ppl": "3.42", "accuracy": "72.239", "wer": "27.738", "wps": "73.9", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "18200", "lr": "0.000146402", "gnorm": "12.911", "loss_scale": "2048", "train_wall": "345", "gb_free": "6.5", "wall": "48635"}
+[2024-06-14 15:47:33,877][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "1.731", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "90.84", "wer_total": "126.255", "n_error": "35.375", "ppl": "3.32", "accuracy": "71.95", "wer": "28.019", "wps": "73.1", "ups": "0.58", "wpb": "126.3", "bsz": "8", "num_updates": "18400", "lr": "0.000142081", "gnorm": "11.585", "loss_scale": "2048", "train_wall": "345", "gb_free": "6.5", "wall": "48980"}
+[2024-06-14 15:53:19,053][train_inner][INFO] - {"epoch": 2, "update": 1.233, "loss": "1.693", "ntokens": "126.825", "acc_total": "126.825", "n_correct": "91.685", "wer_total": "126.825", "n_error": "35.095", "ppl": "3.23", "accuracy": "72.293", "wer": "27.672", "wps": "73.5", "ups": "0.58", "wpb": "126.8", "bsz": "8", "num_updates": "18600", "lr": "0.000137888", "gnorm": "11.119", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "49325"}
+[2024-06-14 15:55:50,855][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-14 15:59:05,701][train_inner][INFO] - {"epoch": 2, "update": 1.246, "loss": "1.629", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "94.465", "wer_total": "127.11", "n_error": "32.61", "ppl": "3.09", "accuracy": "74.318", "wer": "25.655", "wps": "73.3", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "18800", "lr": "0.000133819", "gnorm": "11.503", "loss_scale": "1024", "train_wall": "346", "gb_free": "6.5", "wall": "49672"}
+[2024-06-14 16:04:50,369][train_inner][INFO] - {"epoch": 2, "update": 1.26, "loss": "1.605", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "95.545", "wer_total": "127.695", "n_error": "32.12", "ppl": "3.04", "accuracy": "74.823", "wer": "25.154", "wps": "74.1", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "19000", "lr": "0.000129869", "gnorm": "10.839", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "50016"}
+[2024-06-14 16:10:35,420][train_inner][INFO] - {"epoch": 2, "update": 1.273, "loss": "1.647", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "94.21", "wer_total": "127.145", "n_error": "32.925", "ppl": "3.13", "accuracy": "74.097", "wer": "25.896", "wps": "73.7", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "19200", "lr": "0.000126036", "gnorm": "10.923", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "50361"}
+[2024-06-14 16:16:20,189][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "1.494", "ntokens": "125.81", "acc_total": "125.81", "n_correct": "98.28", "wer_total": "125.81", "n_error": "27.52", "ppl": "2.82", "accuracy": "78.118", "wer": "21.874", "wps": "73", "ups": "0.58", "wpb": "125.8", "bsz": "8", "num_updates": "19400", "lr": "0.000122317", "gnorm": "10.032", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "50706"}
+[2024-06-14 16:22:05,015][train_inner][INFO] - {"epoch": 2, "update": 1.299, "loss": "1.531", "ntokens": "125.62", "acc_total": "125.62", "n_correct": "95.665", "wer_total": "125.62", "n_error": "29.925", "ppl": "2.89", "accuracy": "76.154", "wer": "23.822", "wps": "72.9", "ups": "0.58", "wpb": "125.6", "bsz": "8", "num_updates": "19600", "lr": "0.000118707", "gnorm": "10.38", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "51051"}
+[2024-06-14 16:27:49,770][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "1.473", "ntokens": "127.705", "acc_total": "127.705", "n_correct": "96.465", "wer_total": "127.705", "n_error": "31.2", "ppl": "2.78", "accuracy": "75.537", "wer": "24.431", "wps": "74.1", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "19800", "lr": "0.000115203", "gnorm": "9.946", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "51396"}
+[2024-06-14 16:33:33,031][train_inner][INFO] - {"epoch": 2, "update": 1.326, "loss": "1.521", "ntokens": "126.185", "acc_total": "126.185", "n_correct": "94.905", "wer_total": "126.185", "n_error": "31.265", "ppl": "2.87", "accuracy": "75.211", "wer": "24.777", "wps": "73.5", "ups": "0.58", "wpb": "126.2", "bsz": "8", "num_updates": "20000", "lr": "0.000111803", "gnorm": "10.384", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "51739"}
+[2024-06-14 16:33:33,031][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 17:13:33,615][valid][INFO] - {"epoch": 2, "valid_loss": "1.363", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.2092", "valid_wer_total": "18.1585", "valid_n_error": "3.94689", "valid_ppl": "2.57", "valid_accuracy": "78.251", "valid_wer": "21.736", "valid_wps": "181.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "20000", "valid_best_accuracy": "78.251"}
+[2024-06-14 17:13:33,616][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 20000 updates
+[2024-06-14 17:13:33,616][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_20000.pt
+[2024-06-14 17:13:36,650][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_20000.pt
+[2024-06-14 17:13:40,974][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_20000.pt (epoch 2 @ 20000 updates, score 78.251) (writing took 7.358460363000631 seconds)
+[2024-06-14 17:19:25,333][train_inner][INFO] - {"epoch": 2, "update": 1.339, "loss": "1.518", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "95.38", "wer_total": "126.295", "n_error": "30.895", "ppl": "2.86", "accuracy": "75.522", "wer": "24.463", "wps": "9.2", "ups": "0.07", "wpb": "126.3", "bsz": "8", "num_updates": "20200", "lr": "0.000108504", "gnorm": "10.215", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "54491"}
+[2024-06-14 17:25:09,900][train_inner][INFO] - {"epoch": 2, "update": 1.353, "loss": "1.513", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "96.82", "wer_total": "127.09", "n_error": "30.235", "ppl": "2.85", "accuracy": "76.182", "wer": "23.79", "wps": "73.8", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "20400", "lr": "0.000105301", "gnorm": "10.247", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "54836"}
+[2024-06-14 17:30:54,156][train_inner][INFO] - {"epoch": 2, "update": 1.366, "loss": "1.463", "ntokens": "126.48", "acc_total": "126.48", "n_correct": "97.28", "wer_total": "126.48", "n_error": "29.19", "ppl": "2.76", "accuracy": "76.913", "wer": "23.079", "wps": "73.5", "ups": "0.58", "wpb": "126.5", "bsz": "8", "num_updates": "20600", "lr": "0.000102194", "gnorm": "9.839", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "55180"}
+[2024-06-14 17:36:38,814][train_inner][INFO] - {"epoch": 2, "update": 1.379, "loss": "1.446", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "100.08", "wer_total": "127.35", "n_error": "27.255", "ppl": "2.73", "accuracy": "78.587", "wer": "21.402", "wps": "73.9", "ups": "0.58", "wpb": "127.3", "bsz": "8", "num_updates": "20800", "lr": "9.91776e-05", "gnorm": "9.489", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "55525"}
+[2024-06-14 17:42:23,060][train_inner][INFO] - {"epoch": 2, "update": 1.392, "loss": "1.443", "ntokens": "127.315", "acc_total": "127.315", "n_correct": "98.71", "wer_total": "127.315", "n_error": "28.59", "ppl": "2.72", "accuracy": "77.532", "wer": "22.456", "wps": "74", "ups": "0.58", "wpb": "127.3", "bsz": "8", "num_updates": "21000", "lr": "9.62506e-05", "gnorm": "9.752", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "55869"}
+[2024-06-14 17:48:07,109][train_inner][INFO] - {"epoch": 2, "update": 1.406, "loss": "1.403", "ntokens": "126.745", "acc_total": "126.745", "n_correct": "98.29", "wer_total": "126.745", "n_error": "28.44", "ppl": "2.64", "accuracy": "77.549", "wer": "22.439", "wps": "73.7", "ups": "0.58", "wpb": "126.7", "bsz": "8", "num_updates": "21200", "lr": "9.341e-05", "gnorm": "9.636", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "56213"}
+[2024-06-14 17:53:51,117][train_inner][INFO] - {"epoch": 2, "update": 1.419, "loss": "1.387", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "99.81", "wer_total": "127.32", "n_error": "27.505", "ppl": "2.61", "accuracy": "78.393", "wer": "21.603", "wps": "74", "ups": "0.58", "wpb": "127.3", "bsz": "8", "num_updates": "21400", "lr": "9.06532e-05", "gnorm": "9.174", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "56557"}
+[2024-06-14 17:59:35,428][train_inner][INFO] - {"epoch": 2, "update": 1.432, "loss": "1.404", "ntokens": "126.83", "acc_total": "126.83", "n_correct": "98.755", "wer_total": "126.83", "n_error": "28.045", "ppl": "2.65", "accuracy": "77.864", "wer": "22.112", "wps": "73.7", "ups": "0.58", "wpb": "126.8", "bsz": "8", "num_updates": "21600", "lr": "8.79777e-05", "gnorm": "9.373", "loss_scale": "2048", "train_wall": "344", "gb_free": "6.5", "wall": "56901"}
+[2024-06-14 18:02:44,889][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-14 18:05:21,667][train_inner][INFO] - {"epoch": 2, "update": 1.445, "loss": "1.334", "ntokens": "125.525", "acc_total": "125.525", "n_correct": "99.795", "wer_total": "125.525", "n_error": "25.72", "ppl": "2.52", "accuracy": "79.502", "wer": "20.49", "wps": "72.5", "ups": "0.58", "wpb": "125.5", "bsz": "8", "num_updates": "21800", "lr": "8.53812e-05", "gnorm": "9.312", "loss_scale": "1024", "train_wall": "346", "gb_free": "6.5", "wall": "57248"}
+[2024-06-14 18:11:06,400][train_inner][INFO] - {"epoch": 2, "update": 1.459, "loss": "1.429", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "99.39", "wer_total": "127.64", "n_error": "28.245", "ppl": "2.69", "accuracy": "77.867", "wer": "22.129", "wps": "74.1", "ups": "0.58", "wpb": "127.6", "bsz": "8", "num_updates": "22000", "lr": "8.28614e-05", "gnorm": "9.231", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "57592"}
+[2024-06-14 18:16:50,851][train_inner][INFO] - {"epoch": 2, "update": 1.472, "loss": "1.302", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "99.615", "wer_total": "126.515", "n_error": "26.89", "ppl": "2.46", "accuracy": "78.738", "wer": "21.254", "wps": "73.5", "ups": "0.58", "wpb": "126.5", "bsz": "8", "num_updates": "22200", "lr": "8.04159e-05", "gnorm": "8.908", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "57937"}
+[2024-06-14 18:22:34,465][train_inner][INFO] - {"epoch": 2, "update": 1.485, "loss": "1.339", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "100.57", "wer_total": "127.11", "n_error": "26.505", "ppl": "2.53", "accuracy": "79.12", "wer": "20.852", "wps": "74", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "22400", "lr": "7.80425e-05", "gnorm": "9.569", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "58281"}
+[2024-06-14 18:25:26,425][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 19:05:30,701][valid][INFO] - {"epoch": 2, "valid_loss": "1.197", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.8529", "valid_wer_total": "18.1585", "valid_n_error": "3.30375", "valid_ppl": "2.29", "valid_accuracy": "81.796", "valid_wer": "18.194", "valid_wps": "181.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "22500", "valid_best_accuracy": "81.796"}
+[2024-06-14 19:05:30,702][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 22500 updates
+[2024-06-14 19:05:30,702][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_22500.pt
+[2024-06-14 19:05:33,722][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_22500.pt
+[2024-06-14 19:05:38,210][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_22500.pt (epoch 2 @ 22500 updates, score 81.796) (writing took 7.508525391000148 seconds)
+[2024-06-14 19:08:29,897][train_inner][INFO] - {"epoch": 2, "update": 1.498, "loss": "1.39", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "99.835", "wer_total": "126.78", "n_error": "26.935", "ppl": "2.62", "accuracy": "78.747", "wer": "21.245", "wps": "9.2", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "22600", "lr": "7.57393e-05", "gnorm": "9.518", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "61036"}
+[2024-06-14 19:14:13,801][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "1.335", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "98.925", "wer_total": "126.625", "n_error": "27.685", "ppl": "2.52", "accuracy": "78.124", "wer": "21.864", "wps": "73.6", "ups": "0.58", "wpb": "126.6", "bsz": "8", "num_updates": "22800", "lr": "7.3504e-05", "gnorm": "8.917", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "61380"}
+[2024-06-14 19:19:57,750][train_inner][INFO] - {"epoch": 2, "update": 1.525, "loss": "1.379", "ntokens": "126.17", "acc_total": "126.17", "n_correct": "98.37", "wer_total": "126.17", "n_error": "27.78", "ppl": "2.6", "accuracy": "77.966", "wer": "22.018", "wps": "73.4", "ups": "0.58", "wpb": "126.2", "bsz": "8", "num_updates": "23000", "lr": "7.13346e-05", "gnorm": "9.519", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "61724"}
+[2024-06-14 19:25:41,968][train_inner][INFO] - {"epoch": 2, "update": 1.538, "loss": "1.316", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "102.05", "wer_total": "126.86", "n_error": "24.8", "ppl": "2.49", "accuracy": "80.443", "wer": "19.549", "wps": "73.7", "ups": "0.58", "wpb": "126.9", "bsz": "8", "num_updates": "23200", "lr": "6.92293e-05", "gnorm": "8.932", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "62068"}
+[2024-06-14 19:31:25,732][train_inner][INFO] - {"epoch": 2, "update": 1.551, "loss": "1.251", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "101.85", "wer_total": "126.31", "n_error": "24.455", "ppl": "2.38", "accuracy": "80.635", "wer": "19.361", "wps": "73.5", "ups": "0.58", "wpb": "126.3", "bsz": "8", "num_updates": "23400", "lr": "6.71862e-05", "gnorm": "8.949", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "62412"}
+[2024-06-14 19:37:10,094][train_inner][INFO] - {"epoch": 2, "update": 1.565, "loss": "1.374", "ntokens": "126.645", "acc_total": "126.645", "n_correct": "100.605", "wer_total": "126.645", "n_error": "26.025", "ppl": "2.59", "accuracy": "79.439", "wer": "20.55", "wps": "73.6", "ups": "0.58", "wpb": "126.6", "bsz": "8", "num_updates": "23600", "lr": "6.52033e-05", "gnorm": "9.417", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "62756"}
+[2024-06-14 19:42:54,269][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "1.321", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "101.11", "wer_total": "127.17", "n_error": "26.05", "ppl": "2.5", "accuracy": "79.508", "wer": "20.484", "wps": "73.9", "ups": "0.58", "wpb": "127.2", "bsz": "8", "num_updates": "23800", "lr": "6.3279e-05", "gnorm": "9.008", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "63100"}
+[2024-06-14 19:48:38,062][train_inner][INFO] - {"epoch": 2, "update": 1.591, "loss": "1.306", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "101.005", "wer_total": "126.81", "n_error": "25.805", "ppl": "2.47", "accuracy": "79.651", "wer": "20.349", "wps": "73.8", "ups": "0.58", "wpb": "126.8", "bsz": "8", "num_updates": "24000", "lr": "6.14114e-05", "gnorm": "8.988", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "63444"}
+[2024-06-14 19:54:22,222][train_inner][INFO] - {"epoch": 2, "update": 1.604, "loss": "1.292", "ntokens": "126.145", "acc_total": "126.145", "n_correct": "100.515", "wer_total": "126.145", "n_error": "25.625", "ppl": "2.45", "accuracy": "79.682", "wer": "20.314", "wps": "73.3", "ups": "0.58", "wpb": "126.1", "bsz": "8", "num_updates": "24200", "lr": "5.9599e-05", "gnorm": "8.539", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "63788"}
+[2024-06-14 19:59:01,027][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-14 20:00:08,141][train_inner][INFO] - {"epoch": 2, "update": 1.618, "loss": "1.292", "ntokens": "127.225", "acc_total": "127.225", "n_correct": "101.2", "wer_total": "127.225", "n_error": "26.01", "ppl": "2.45", "accuracy": "79.544", "wer": "20.444", "wps": "73.6", "ups": "0.58", "wpb": "127.2", "bsz": "8", "num_updates": "24400", "lr": "5.784e-05", "gnorm": "8.628", "loss_scale": "1024", "train_wall": "345", "gb_free": "6.5", "wall": "64134"}
+[2024-06-14 20:05:52,304][train_inner][INFO] - {"epoch": 2, "update": 1.631, "loss": "1.327", "ntokens": "127.38", "acc_total": "127.38", "n_correct": "99.245", "wer_total": "127.38", "n_error": "28.13", "ppl": "2.51", "accuracy": "77.913", "wer": "22.084", "wps": "74", "ups": "0.58", "wpb": "127.4", "bsz": "8", "num_updates": "24600", "lr": "5.6133e-05", "gnorm": "8.627", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "64478"}
+[2024-06-14 20:11:36,611][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "1.258", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "100.765", "wer_total": "127.26", "n_error": "26.49", "ppl": "2.39", "accuracy": "79.18", "wer": "20.816", "wps": "73.9", "ups": "0.58", "wpb": "127.3", "bsz": "8", "num_updates": "24800", "lr": "5.44763e-05", "gnorm": "8.642", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "64823"}
+[2024-06-14 20:17:21,039][train_inner][INFO] - {"epoch": 2, "update": 1.658, "loss": "1.296", "ntokens": "125.92", "acc_total": "125.92", "n_correct": "99.05", "wer_total": "125.92", "n_error": "26.845", "ppl": "2.45", "accuracy": "78.661", "wer": "21.319", "wps": "73.1", "ups": "0.58", "wpb": "125.9", "bsz": "8", "num_updates": "25000", "lr": "5.28686e-05", "gnorm": "8.721", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "65167"}
+[2024-06-14 20:17:21,039][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 20:57:25,423][valid][INFO] - {"epoch": 2, "valid_loss": "1.111", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.0544", "valid_wer_total": "18.1585", "valid_n_error": "3.10279", "valid_ppl": "2.16", "valid_accuracy": "82.906", "valid_wer": "17.087", "valid_wps": "181.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "25000", "valid_best_accuracy": "82.906"}
+[2024-06-14 20:57:25,424][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25000 updates
+[2024-06-14 20:57:25,424][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_25000.pt
+[2024-06-14 20:57:28,425][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_25000.pt
+[2024-06-14 20:57:32,815][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_25000.pt (epoch 2 @ 25000 updates, score 82.906) (writing took 7.3910979380016215 seconds)
+[2024-06-14 21:03:16,863][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "1.207", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "101.965", "wer_total": "127.44", "n_error": "25.465", "ppl": "2.31", "accuracy": "80.01", "wer": "19.982", "wps": "9.2", "ups": "0.07", "wpb": "127.4", "bsz": "8", "num_updates": "25200", "lr": "5.13083e-05", "gnorm": "8.557", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "67923"}
+[2024-06-14 21:09:00,403][train_inner][INFO] - {"epoch": 2, "update": 1.684, "loss": "1.253", "ntokens": "125.36", "acc_total": "125.36", "n_correct": "99.435", "wer_total": "125.36", "n_error": "25.91", "ppl": "2.38", "accuracy": "79.32", "wer": "20.668", "wps": "73", "ups": "0.58", "wpb": "125.4", "bsz": "8", "num_updates": "25400", "lr": "4.9794e-05", "gnorm": "8.557", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "68266"}
+[2024-06-14 21:14:43,945][train_inner][INFO] - {"epoch": 2, "update": 1.697, "loss": "1.196", "ntokens": "127.07", "acc_total": "127.07", "n_correct": "101.74", "wer_total": "127.07", "n_error": "25.33", "ppl": "2.29", "accuracy": "80.066", "wer": "19.934", "wps": "74", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "25600", "lr": "4.83244e-05", "gnorm": "8.41", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "68610"}
+[2024-06-14 21:20:27,651][train_inner][INFO] - {"epoch": 2, "update": 1.711, "loss": "1.207", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "102.295", "wer_total": "127.055", "n_error": "24.75", "ppl": "2.31", "accuracy": "80.512", "wer": "19.48", "wps": "73.9", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "25800", "lr": "4.68982e-05", "gnorm": "8.572", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "68954"}
+[2024-06-14 21:26:11,243][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "1.261", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "102.75", "wer_total": "126.935", "n_error": "24.175", "ppl": "2.4", "accuracy": "80.947", "wer": "19.045", "wps": "73.9", "ups": "0.58", "wpb": "126.9", "bsz": "8", "num_updates": "26000", "lr": "4.55141e-05", "gnorm": "8.532", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "69297"}
+[2024-06-14 21:31:54,867][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "1.24", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "102.295", "wer_total": "126.305", "n_error": "23.995", "ppl": "2.36", "accuracy": "80.99", "wer": "18.998", "wps": "73.5", "ups": "0.58", "wpb": "126.3", "bsz": "8", "num_updates": "26200", "lr": "4.41708e-05", "gnorm": "8.581", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "69641"}
+[2024-06-14 21:37:38,618][train_inner][INFO] - {"epoch": 2, "update": 1.75, "loss": "1.219", "ntokens": "127.47", "acc_total": "127.47", "n_correct": "102.62", "wer_total": "127.47", "n_error": "24.845", "ppl": "2.33", "accuracy": "80.505", "wer": "19.491", "wps": "74.2", "ups": "0.58", "wpb": "127.5", "bsz": "8", "num_updates": "26400", "lr": "4.28672e-05", "gnorm": "8.518", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "69985"}
+[2024-06-14 21:43:22,002][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "1.183", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "103.87", "wer_total": "126.6", "n_error": "22.72", "ppl": "2.27", "accuracy": "82.046", "wer": "17.946", "wps": "73.7", "ups": "0.58", "wpb": "126.6", "bsz": "8", "num_updates": "26600", "lr": "4.16021e-05", "gnorm": "8.669", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "70328"}
+[2024-06-14 21:49:05,848][train_inner][INFO] - {"epoch": 2, "update": 1.777, "loss": "1.172", "ntokens": "126.565", "acc_total": "126.565", "n_correct": "104.155", "wer_total": "126.565", "n_error": "22.405", "ppl": "2.25", "accuracy": "82.294", "wer": "17.702", "wps": "73.6", "ups": "0.58", "wpb": "126.6", "bsz": "8", "num_updates": "26800", "lr": "4.03743e-05", "gnorm": "8.51", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "70672"}
+[2024-06-14 21:54:49,176][train_inner][INFO] - {"epoch": 2, "update": 1.79, "loss": "1.235", "ntokens": "127.69", "acc_total": "127.69", "n_correct": "103.79", "wer_total": "127.69", "n_error": "23.885", "ppl": "2.35", "accuracy": "81.283", "wer": "18.705", "wps": "74.4", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "27000", "lr": "3.91827e-05", "gnorm": "8.762", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "71015"}
+[2024-06-14 21:58:21,990][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-14 22:00:34,225][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "1.228", "ntokens": "127.93", "acc_total": "127.93", "n_correct": "104.29", "wer_total": "127.93", "n_error": "23.625", "ppl": "2.34", "accuracy": "81.521", "wer": "18.467", "wps": "74.2", "ups": "0.58", "wpb": "127.9", "bsz": "8", "num_updates": "27200", "lr": "3.80263e-05", "gnorm": "8.609", "loss_scale": "1024", "train_wall": "344", "gb_free": "6.5", "wall": "71360"}
+[2024-06-14 22:06:17,734][train_inner][INFO] - {"epoch": 2, "update": 1.817, "loss": "1.229", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "104.255", "wer_total": "126.68", "n_error": "22.42", "ppl": "2.34", "accuracy": "82.298", "wer": "17.698", "wps": "73.8", "ups": "0.58", "wpb": "126.7", "bsz": "8", "num_updates": "27400", "lr": "3.6904e-05", "gnorm": "8.623", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "71704"}
+[2024-06-14 22:09:09,665][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-14 22:49:09,373][valid][INFO] - {"epoch": 2, "valid_loss": "1.056", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2815", "valid_wer_total": "18.1585", "valid_n_error": "2.87591", "valid_ppl": "2.08", "valid_accuracy": "84.156", "valid_wer": "15.838", "valid_wps": "181.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "27500", "valid_best_accuracy": "84.156"}
+[2024-06-14 22:49:09,374][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 27500 updates
+[2024-06-14 22:49:09,374][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_27500.pt
+[2024-06-14 22:49:12,388][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_27500.pt
+[2024-06-14 22:49:16,744][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_27500.pt (epoch 2 @ 27500 updates, score 84.156) (writing took 7.370070165998186 seconds)
+[2024-06-14 22:52:08,192][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "1.19", "ntokens": "126.45", "acc_total": "126.45", "n_correct": "103.77", "wer_total": "126.45", "n_error": "22.67", "ppl": "2.28", "accuracy": "82.064", "wer": "17.928", "wps": "9.2", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "27600", "lr": "3.58149e-05", "gnorm": "8.327", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "74454"}
+[2024-06-14 22:57:51,752][train_inner][INFO] - {"epoch": 2, "update": 1.843, "loss": "1.206", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "104.605", "wer_total": "127.465", "n_error": "22.845", "ppl": "2.31", "accuracy": "82.066", "wer": "17.923", "wps": "74.2", "ups": "0.58", "wpb": "127.5", "bsz": "8", "num_updates": "27800", "lr": "3.47579e-05", "gnorm": "8.289", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "74798"}
+[2024-06-14 23:03:34,915][train_inner][INFO] - {"epoch": 2, "update": 1.856, "loss": "1.219", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "104.09", "wer_total": "127.49", "n_error": "23.4", "ppl": "2.33", "accuracy": "81.646", "wer": "18.354", "wps": "74.3", "ups": "0.58", "wpb": "127.5", "bsz": "8", "num_updates": "28000", "lr": "3.37321e-05", "gnorm": "8.213", "loss_scale": "1024", "train_wall": "342", "gb_free": "6.5", "wall": "75141"}
+[2024-06-14 23:09:18,155][train_inner][INFO] - {"epoch": 2, "update": 1.87, "loss": "1.169", "ntokens": "127.75", "acc_total": "127.75", "n_correct": "105.03", "wer_total": "127.75", "n_error": "22.715", "ppl": "2.25", "accuracy": "82.215", "wer": "17.781", "wps": "74.4", "ups": "0.58", "wpb": "127.8", "bsz": "8", "num_updates": "28200", "lr": "3.27365e-05", "gnorm": "8.728", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "75484"}
+[2024-06-14 23:15:01,614][train_inner][INFO] - {"epoch": 2, "update": 1.883, "loss": "1.183", "ntokens": "127.125", "acc_total": "127.125", "n_correct": "105.05", "wer_total": "127.125", "n_error": "22.075", "ppl": "2.27", "accuracy": "82.635", "wer": "17.365", "wps": "74", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "28400", "lr": "3.17704e-05", "gnorm": "8.202", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "75828"}
+[2024-06-14 23:20:45,169][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "1.189", "ntokens": "128.015", "acc_total": "128.015", "n_correct": "104.555", "wer_total": "128.015", "n_error": "23.44", "ppl": "2.28", "accuracy": "81.674", "wer": "18.31", "wps": "74.5", "ups": "0.58", "wpb": "128", "bsz": "8", "num_updates": "28600", "lr": "3.08327e-05", "gnorm": "8.457", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "76171"}
+[2024-06-14 23:26:28,919][train_inner][INFO] - {"epoch": 2, "update": 1.91, "loss": "1.164", "ntokens": "126.755", "acc_total": "126.755", "n_correct": "104.86", "wer_total": "126.755", "n_error": "21.885", "ppl": "2.24", "accuracy": "82.727", "wer": "17.266", "wps": "73.7", "ups": "0.58", "wpb": "126.8", "bsz": "8", "num_updates": "28800", "lr": "2.99228e-05", "gnorm": "8.574", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "76515"}
+[2024-06-14 23:32:12,801][train_inner][INFO] - {"epoch": 2, "update": 1.923, "loss": "1.141", "ntokens": "127.74", "acc_total": "127.74", "n_correct": "105.89", "wer_total": "127.74", "n_error": "21.835", "ppl": "2.2", "accuracy": "82.895", "wer": "17.093", "wps": "74.3", "ups": "0.58", "wpb": "127.7", "bsz": "8", "num_updates": "29000", "lr": "2.90397e-05", "gnorm": "8.29", "loss_scale": "1024", "train_wall": "343", "gb_free": "6.5", "wall": "76859"}
+[2024-06-14 23:37:56,620][train_inner][INFO] - {"epoch": 2, "update": 1.936, "loss": "1.196", "ntokens": "126.765", "acc_total": "126.765", "n_correct": "104.415", "wer_total": "126.765", "n_error": "22.35", "ppl": "2.29", "accuracy": "82.369", "wer": "17.631", "wps": "73.7", "ups": "0.58", "wpb": "126.8", "bsz": "8", "num_updates": "29200", "lr": "2.81826e-05", "gnorm": "8.538", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "77203"}
+[2024-06-14 23:43:40,327][train_inner][INFO] - {"epoch": 2, "update": 1.949, "loss": "1.17", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "104.625", "wer_total": "127.17", "n_error": "22.54", "ppl": "2.25", "accuracy": "82.272", "wer": "17.724", "wps": "74", "ups": "0.58", "wpb": "127.2", "bsz": "8", "num_updates": "29400", "lr": "2.73509e-05", "gnorm": "8.513", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "77546"}
+[2024-06-14 23:49:23,679][train_inner][INFO] - {"epoch": 2, "update": 1.963, "loss": "1.152", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "104.55", "wer_total": "127.1", "n_error": "22.54", "ppl": "2.22", "accuracy": "82.258", "wer": "17.734", "wps": "74", "ups": "0.58", "wpb": "127.1", "bsz": "8", "num_updates": "29600", "lr": "2.65436e-05", "gnorm": "8.465", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "77890"}
+[2024-06-14 23:55:06,999][train_inner][INFO] - {"epoch": 2, "update": 1.976, "loss": "1.177", "ntokens": "126.885", "acc_total": "126.885", "n_correct": "104.53", "wer_total": "126.885", "n_error": "22.35", "ppl": "2.26", "accuracy": "82.382", "wer": "17.614", "wps": "73.9", "ups": "0.58", "wpb": "126.9", "bsz": "8", "num_updates": "29800", "lr": "2.57603e-05", "gnorm": "8.515", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "78233"}
+[2024-06-15 00:00:50,507][train_inner][INFO] - {"epoch": 2, "update": 1.989, "loss": "1.181", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "104.595", "wer_total": "126.86", "n_error": "22.265", "ppl": "2.27", "accuracy": "82.449", "wer": "17.551", "wps": "73.9", "ups": "0.58", "wpb": "126.9", "bsz": "8", "num_updates": "30000", "lr": "2.5e-05", "gnorm": "8.347", "loss_scale": "2048", "train_wall": "343", "gb_free": "6.5", "wall": "78577"}
+[2024-06-15 00:00:50,508][fairseq_cli.train][INFO] - Stopping training due to num_updates: 30000 >= max_update: 30000
+[2024-06-15 00:00:50,508][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 00:40:55,217][valid][INFO] - {"epoch": 2, "valid_loss": "1.001", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4322", "valid_wer_total": "18.1585", "valid_n_error": "2.72534", "valid_ppl": "2", "valid_accuracy": "84.986", "valid_wer": "15.009", "valid_wps": "181.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "84.986"}
+[2024-06-15 00:40:55,217][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 30000 updates
+[2024-06-15 00:40:55,217][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_30000.pt
+[2024-06-15 00:40:58,210][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_30000.pt
+[2024-06-15 00:41:02,550][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_30000.pt (epoch 2 @ 30000 updates, score 84.986) (writing took 7.3325579990050755 seconds)
+[2024-06-15 00:41:02,616][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below)
+[2024-06-15 00:41:02,618][train][INFO] - {"epoch": 2, "train_loss": "1.423", "train_ntokens": "126.907", "train_acc_total": "126.907", "train_n_correct": "98.528", "train_wer_total": "126.907", "train_n_error": "28.3609", "train_ppl": "2.68", "train_accuracy": "77.638", "train_wer": "22.348", "train_wps": "47.5", "train_ups": "0.37", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "30000", "train_lr": "2.5e-05", "train_gnorm": "8.053", "train_loss_scale": "2048", "train_train_wall": "25305", "train_gb_free": "6.5", "train_wall": "80989"}
+[2024-06-15 00:41:02,618][fairseq_cli.train][INFO] - done training in 80987.6 seconds