diff --git "a/hydra_train.log" "b/hydra_train.log"
--- "a/hydra_train.log"
+++ "b/hydra_train.log"
@@ -1,10 +1,10 @@
-[2024-06-07 16:45:10,787][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 30000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 18000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 10000, 'hold_steps': 0, 'decay_steps': 20000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 30000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}}
-[2024-06-07 16:45:10,790][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViVSP-LLM_v1.0
-[2024-06-07 16:45:10,790][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True}
-[2024-06-07 16:45:11,990][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViVSP-LLM_v1.0
-[2024-06-07 16:45:11,991][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False}
-[2024-06-07 16:45:11,994][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True}
-[2024-06-07 16:45:17,332][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count(
+[2024-06-15 00:51:54,525][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 30000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 18000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 10000, 'hold_steps': 0, 'decay_steps': 20000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 30000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}}
+[2024-06-15 00:51:54,528][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViVSP-LLM_v1.0
+[2024-06-15 00:51:54,528][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True}
+[2024-06-15 00:51:56,244][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViVSP-LLM_v1.0
+[2024-06-15 00:51:56,244][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False}
+[2024-06-15 00:51:56,247][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True}
+[2024-06-15 00:52:00,287][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count(
   (encoder): HubertEncoderWrapper(
     (w2v_model): AVHubertModel(
       (feature_extractor_audio): SubModel(
@@ -212,730 +212,719 @@
   )
   (avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True)
 )
-[2024-06-07 16:45:17,338][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask
-[2024-06-07 16:45:17,338][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count
-[2024-06-07 16:45:17,338][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss
-[2024-06-07 16:45:17,341][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424)
-[2024-06-07 16:45:17,344][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0)
-[2024-06-07 16:45:17,345][src.vsp_llm_training][INFO] - Using tokenizer
-[2024-06-07 16:45:17,363][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 12663, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=75
-[2024-06-07 16:45:18,222][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/valid.wrd is sequence label. skipped
-[2024-06-07 16:45:18,222][src.vsp_llm_dataset][INFO] - image transform: Compose(
+[2024-06-15 00:52:00,292][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask
+[2024-06-15 00:52:00,292][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count
+[2024-06-15 00:52:00,292][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss
+[2024-06-15 00:52:00,295][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424)
+[2024-06-15 00:52:00,298][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0)
+[2024-06-15 00:52:00,298][src.vsp_llm_training][INFO] - Using tokenizer
+[2024-06-15 00:52:00,337][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76
+[2024-06-15 00:52:00,715][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/valid.wrd is sequence label. skipped
+[2024-06-15 00:52:00,715][src.vsp_llm_dataset][INFO] - image transform: Compose(
     Normalize(mean=0.0, std=255.0)
-    <src.utils_vsp_llm.CenterCrop object at 0x7a4cdc3cf880>
+    <src.utils_vsp_llm.CenterCrop object at 0x79c6c3a19400>
     Normalize(mean=0.421, std=0.165)
 )
-[2024-06-07 16:45:18,222][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
-[2024-06-07 16:45:18,222][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,390][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,391][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,392][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias
-[2024-06-07 16:45:18,393][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,394][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias
-[2024-06-07 16:45:18,395][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias
-[2024-06-07 16:45:18,396][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,397][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,397][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,398][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias
-[2024-06-07 16:45:18,399][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias
-[2024-06-07 16:45:18,399][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
-[2024-06-07 16:45:18,399][fairseq.utils][INFO] - rank   0: capabilities =  8.6  ; total memory = 15.731 GB ; name = NVIDIA RTX A4000                        
-[2024-06-07 16:45:18,399][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
-[2024-06-07 16:45:18,399][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs)
-[2024-06-07 16:45:18,400][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1
-[2024-06-07 16:45:18,400][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt
-[2024-06-07 16:45:18,400][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt
-[2024-06-07 16:45:18,400][fairseq.trainer][INFO] - loading train data for epoch 1
-[2024-06-07 16:45:18,400][src.vsp_llm_training][INFO] - Using tokenizer
-[2024-06-07 16:45:18,544][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 101180, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73
-[2024-06-07 16:45:18,924][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/train.wrd is sequence label. skipped
-[2024-06-07 16:45:18,924][src.vsp_llm_dataset][INFO] - image transform: Compose(
+[2024-06-15 00:52:00,715][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
+[2024-06-15 00:52:00,715][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias
+[2024-06-15 00:52:00,892][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias
+[2024-06-15 00:52:00,893][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,894][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,895][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,896][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,897][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,898][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias
+[2024-06-15 00:52:00,899][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias
+[2024-06-15 00:52:00,900][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias
+[2024-06-15 00:52:00,901][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
+[2024-06-15 00:52:00,901][fairseq.utils][INFO] - rank   0: capabilities =  8.6  ; total memory = 15.729 GB ; name = NVIDIA RTX A4000                        
+[2024-06-15 00:52:00,901][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers***********************
+[2024-06-15 00:52:00,901][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs)
+[2024-06-15 00:52:00,901][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1
+[2024-06-15 00:52:00,901][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt
+[2024-06-15 00:52:00,902][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt
+[2024-06-15 00:52:00,902][fairseq.trainer][INFO] - loading train data for epoch 1
+[2024-06-15 00:52:00,902][src.vsp_llm_training][INFO] - Using tokenizer
+[2024-06-15 00:52:01,070][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 120686, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73
+[2024-06-15 00:52:01,427][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/train.wrd is sequence label. skipped
+[2024-06-15 00:52:01,427][src.vsp_llm_dataset][INFO] - image transform: Compose(
     Normalize(mean=0.0, std=255.0)
     RandomCrop(size=(88, 88))
-    <src.utils_vsp_llm.HorizontalFlip object at 0x7a4c8edf05b0>
+    <src.utils_vsp_llm.HorizontalFlip object at 0x79c6c3f2c430>
     Normalize(mean=0.421, std=0.165)
 )
-[2024-06-07 16:45:18,924][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
-[2024-06-07 16:45:18,925][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
-[2024-06-07 16:45:22,640][fairseq.trainer][INFO] - begin training epoch 1
-[2024-06-07 16:45:22,640][fairseq_cli.train][INFO] - Start iterating over samples
-[2024-06-07 16:50:49,850][train_inner][INFO] - {"epoch": 1, "update": 0.016, "loss": "7.61", "ntokens": "128.135", "acc_total": "128.135", "n_correct": "18.315", "wer_total": "128.135", "n_error": "109.77", "ppl": "195.29", "accuracy": "14.294", "wer": "85.667", "wps": "78.5", "ups": "0.61", "wpb": "128.1", "bsz": "8", "num_updates": "200", "lr": "1.49e-05", "gnorm": "8.857", "loss_scale": "128", "train_wall": "326", "gb_free": "7.1", "wall": "331"}
-[2024-06-07 16:56:18,138][train_inner][INFO] - {"epoch": 1, "update": 0.032, "loss": "6.258", "ntokens": "128.27", "acc_total": "128.27", "n_correct": "25.555", "wer_total": "128.27", "n_error": "102.54", "ppl": "76.52", "accuracy": "19.923", "wer": "79.941", "wps": "78.1", "ups": "0.61", "wpb": "128.3", "bsz": "8", "num_updates": "400", "lr": "2.48e-05", "gnorm": "3.64", "loss_scale": "128", "train_wall": "328", "gb_free": "7.1", "wall": "660"}
-[2024-06-07 17:01:46,404][train_inner][INFO] - {"epoch": 1, "update": 0.047, "loss": "6.078", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "28.44", "wer_total": "127.235", "n_error": "98.495", "ppl": "67.57", "accuracy": "22.352", "wer": "77.412", "wps": "77.5", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "600", "lr": "3.47e-05", "gnorm": "3.554", "loss_scale": "128", "train_wall": "328", "gb_free": "7.1", "wall": "988"}
-[2024-06-07 17:07:13,778][train_inner][INFO] - {"epoch": 1, "update": 0.063, "loss": "5.973", "ntokens": "128.155", "acc_total": "128.155", "n_correct": "30.085", "wer_total": "128.155", "n_error": "97.81", "ppl": "62.83", "accuracy": "23.475", "wer": "76.322", "wps": "78.3", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "800", "lr": "4.46e-05", "gnorm": "3.542", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "1315"}
-[2024-06-07 17:12:41,141][train_inner][INFO] - {"epoch": 1, "update": 0.079, "loss": "5.932", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "29.45", "wer_total": "127.52", "n_error": "97.78", "ppl": "61.06", "accuracy": "23.094", "wer": "76.678", "wps": "77.9", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "1000", "lr": "5.45e-05", "gnorm": "3.455", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "1643"}
-[2024-06-07 17:18:08,537][train_inner][INFO] - {"epoch": 1, "update": 0.095, "loss": "5.901", "ntokens": "127.845", "acc_total": "127.845", "n_correct": "30.395", "wer_total": "127.845", "n_error": "97.215", "ppl": "59.78", "accuracy": "23.775", "wer": "76.041", "wps": "78.1", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "1200", "lr": "6.44e-05", "gnorm": "3.217", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "1970"}
-[2024-06-07 17:23:36,079][train_inner][INFO] - {"epoch": 1, "update": 0.111, "loss": "5.876", "ntokens": "128.09", "acc_total": "128.09", "n_correct": "30.975", "wer_total": "128.09", "n_error": "96.935", "ppl": "58.71", "accuracy": "24.182", "wer": "75.677", "wps": "78.2", "ups": "0.61", "wpb": "128.1", "bsz": "8", "num_updates": "1400", "lr": "7.43e-05", "gnorm": "2.962", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "2298"}
-[2024-06-07 17:29:03,208][train_inner][INFO] - {"epoch": 1, "update": 0.127, "loss": "5.895", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "30.595", "wer_total": "127.155", "n_error": "96.31", "ppl": "59.51", "accuracy": "24.061", "wer": "75.742", "wps": "77.7", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "1600", "lr": "8.42e-05", "gnorm": "2.692", "loss_scale": "128", "train_wall": "326", "gb_free": "7.1", "wall": "2625"}
-[2024-06-07 17:34:30,304][train_inner][INFO] - {"epoch": 1, "update": 0.142, "loss": "5.813", "ntokens": "128.27", "acc_total": "128.27", "n_correct": "31.285", "wer_total": "128.27", "n_error": "96.71", "ppl": "56.22", "accuracy": "24.39", "wer": "75.396", "wps": "78.4", "ups": "0.61", "wpb": "128.3", "bsz": "8", "num_updates": "1800", "lr": "9.41e-05", "gnorm": "2.517", "loss_scale": "128", "train_wall": "326", "gb_free": "7.1", "wall": "2952"}
-[2024-06-07 17:39:57,226][train_inner][INFO] - {"epoch": 1, "update": 0.158, "loss": "5.808", "ntokens": "127.54", "acc_total": "127.54", "n_correct": "31.16", "wer_total": "127.54", "n_error": "96.185", "ppl": "56.03", "accuracy": "24.432", "wer": "75.416", "wps": "78", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "2000", "lr": "0.000104", "gnorm": "2.307", "loss_scale": "128", "train_wall": "326", "gb_free": "7.1", "wall": "3279"}
-[2024-06-07 17:45:24,578][train_inner][INFO] - {"epoch": 1, "update": 0.174, "loss": "5.797", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "31.665", "wer_total": "127.94", "n_error": "96.125", "ppl": "55.59", "accuracy": "24.75", "wer": "75.133", "wps": "78.2", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "2200", "lr": "0.0001139", "gnorm": "2.196", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "3606"}
-[2024-06-07 17:50:51,677][train_inner][INFO] - {"epoch": 1, "update": 0.19, "loss": "5.784", "ntokens": "129.415", "acc_total": "129.415", "n_correct": "32.22", "wer_total": "129.415", "n_error": "97.04", "ppl": "55.09", "accuracy": "24.897", "wer": "74.984", "wps": "79.1", "ups": "0.61", "wpb": "129.4", "bsz": "8", "num_updates": "2400", "lr": "0.0001238", "gnorm": "2.068", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "3933"}
-[2024-06-07 17:53:35,241][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-07 18:15:36,554][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "3.9414", "valid_wer_total": "15.5723", "valid_n_error": "11.6061", "valid_ppl": "nan", "valid_accuracy": "25.31", "valid_wer": "74.53", "valid_wps": "149.2", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "2500"}
-[2024-06-07 18:15:36,555][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 2500 updates
-[2024-06-07 18:15:36,555][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_2500.pt
-[2024-06-07 18:15:39,699][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_2500.pt
-[2024-06-07 18:15:42,456][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_2500.pt (epoch 1 @ 2500 updates, score 25.31) (writing took 5.901058482006192 seconds)
-[2024-06-07 18:18:25,720][train_inner][INFO] - {"epoch": 1, "update": 0.206, "loss": "5.714", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "32.76", "wer_total": "127.155", "n_error": "94.19", "ppl": "52.48", "accuracy": "25.764", "wer": "74.075", "wps": "15.4", "ups": "0.12", "wpb": "127.2", "bsz": "8", "num_updates": "2600", "lr": "0.0001337", "gnorm": "1.962", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "5587"}
-[2024-06-07 18:23:52,981][train_inner][INFO] - {"epoch": 1, "update": 0.221, "loss": "5.727", "ntokens": "129.095", "acc_total": "129.095", "n_correct": "32.93", "wer_total": "129.095", "n_error": "95.995", "ppl": "52.97", "accuracy": "25.508", "wer": "74.36", "wps": "78.9", "ups": "0.61", "wpb": "129.1", "bsz": "8", "num_updates": "2800", "lr": "0.0001436", "gnorm": "1.91", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "5915"}
-[2024-06-07 18:29:20,079][train_inner][INFO] - {"epoch": 1, "update": 0.237, "loss": "5.782", "ntokens": "129.18", "acc_total": "129.18", "n_correct": "32.57", "wer_total": "129.18", "n_error": "96.42", "ppl": "55.03", "accuracy": "25.213", "wer": "74.64", "wps": "79", "ups": "0.61", "wpb": "129.2", "bsz": "8", "num_updates": "3000", "lr": "0.0001535", "gnorm": "1.852", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "6242"}
-[2024-06-07 18:34:47,235][train_inner][INFO] - {"epoch": 1, "update": 0.253, "loss": "5.734", "ntokens": "128.19", "acc_total": "128.19", "n_correct": "32.8", "wer_total": "128.19", "n_error": "95.06", "ppl": "53.23", "accuracy": "25.587", "wer": "74.156", "wps": "78.4", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "3200", "lr": "0.0001634", "gnorm": "1.824", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "6569"}
-[2024-06-07 18:40:14,393][train_inner][INFO] - {"epoch": 1, "update": 0.269, "loss": "5.673", "ntokens": "127.955", "acc_total": "127.955", "n_correct": "33.685", "wer_total": "127.955", "n_error": "94.05", "ppl": "51.03", "accuracy": "26.326", "wer": "73.502", "wps": "78.2", "ups": "0.61", "wpb": "128", "bsz": "8", "num_updates": "3400", "lr": "0.0001733", "gnorm": "1.792", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "6896"}
-[2024-06-07 18:45:41,682][train_inner][INFO] - {"epoch": 1, "update": 0.285, "loss": "5.685", "ntokens": "128.005", "acc_total": "128.005", "n_correct": "33.48", "wer_total": "128.005", "n_error": "94.29", "ppl": "51.44", "accuracy": "26.155", "wer": "73.661", "wps": "78.2", "ups": "0.61", "wpb": "128", "bsz": "8", "num_updates": "3600", "lr": "0.0001832", "gnorm": "1.738", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "7223"}
-[2024-06-07 18:51:08,752][train_inner][INFO] - {"epoch": 1, "update": 0.3, "loss": "5.648", "ntokens": "127.875", "acc_total": "127.875", "n_correct": "33.68", "wer_total": "127.875", "n_error": "93.89", "ppl": "50.13", "accuracy": "26.338", "wer": "73.423", "wps": "78.2", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "3800", "lr": "0.0001931", "gnorm": "1.781", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "7550"}
-[2024-06-07 18:56:36,038][train_inner][INFO] - {"epoch": 1, "update": 0.316, "loss": "5.602", "ntokens": "128.9", "acc_total": "128.9", "n_correct": "34.795", "wer_total": "128.9", "n_error": "93.755", "ppl": "48.59", "accuracy": "26.994", "wer": "72.735", "wps": "78.8", "ups": "0.61", "wpb": "128.9", "bsz": "8", "num_updates": "4000", "lr": "0.000203", "gnorm": "1.815", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "7878"}
-[2024-06-07 19:02:02,905][train_inner][INFO] - {"epoch": 1, "update": 0.332, "loss": "5.565", "ntokens": "127.815", "acc_total": "127.815", "n_correct": "35", "wer_total": "127.815", "n_error": "92.62", "ppl": "47.34", "accuracy": "27.383", "wer": "72.464", "wps": "78.2", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "4200", "lr": "0.0002129", "gnorm": "1.854", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "8205"}
-[2024-06-07 19:07:30,174][train_inner][INFO] - {"epoch": 1, "update": 0.348, "loss": "5.543", "ntokens": "128.615", "acc_total": "128.615", "n_correct": "35.54", "wer_total": "128.615", "n_error": "92.835", "ppl": "46.61", "accuracy": "27.633", "wer": "72.181", "wps": "78.6", "ups": "0.61", "wpb": "128.6", "bsz": "8", "num_updates": "4400", "lr": "0.0002228", "gnorm": "1.858", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "8532"}
-[2024-06-07 19:12:57,355][train_inner][INFO] - {"epoch": 1, "update": 0.364, "loss": "5.465", "ntokens": "128.235", "acc_total": "128.235", "n_correct": "36.76", "wer_total": "128.235", "n_error": "91.23", "ppl": "44.18", "accuracy": "28.666", "wer": "71.143", "wps": "78.4", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "4600", "lr": "0.0002327", "gnorm": "1.925", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "8859"}
-[2024-06-07 19:18:24,542][train_inner][INFO] - {"epoch": 1, "update": 0.38, "loss": "5.46", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "36.495", "wer_total": "127.94", "n_error": "91.215", "ppl": "44", "accuracy": "28.525", "wer": "71.295", "wps": "78.2", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "4800", "lr": "0.0002426", "gnorm": "1.983", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "9186"}
-[2024-06-07 19:23:51,913][train_inner][INFO] - {"epoch": 1, "update": 0.395, "loss": "5.356", "ntokens": "129.36", "acc_total": "129.36", "n_correct": "38.59", "wer_total": "129.36", "n_error": "90.525", "ppl": "40.95", "accuracy": "29.831", "wer": "69.979", "wps": "79", "ups": "0.61", "wpb": "129.4", "bsz": "8", "num_updates": "5000", "lr": "0.0002525", "gnorm": "2.016", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "9514"}
-[2024-06-07 19:23:51,913][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-07 19:45:53,510][valid][INFO] - {"epoch": 1, "valid_loss": "5.353", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "4.51315", "valid_wer_total": "15.5723", "valid_n_error": "11.0188", "valid_ppl": "40.87", "valid_accuracy": "28.982", "valid_wer": "70.759", "valid_wps": "149.2", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "5000", "valid_best_accuracy": "28.982"}
-[2024-06-07 19:45:53,510][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 5000 updates
-[2024-06-07 19:45:53,511][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_5000.pt
-[2024-06-07 19:45:56,662][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_5000.pt
-[2024-06-07 19:46:01,102][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_5000.pt (epoch 1 @ 5000 updates, score 28.982) (writing took 7.591288318042643 seconds)
-[2024-06-07 19:51:28,140][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "5.346", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "37.64", "wer_total": "127.41", "n_error": "89.535", "ppl": "40.66", "accuracy": "29.542", "wer": "70.273", "wps": "15.4", "ups": "0.12", "wpb": "127.4", "bsz": "8", "num_updates": "5200", "lr": "0.0002624", "gnorm": "2.107", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "11170"}
-[2024-06-07 19:56:55,206][train_inner][INFO] - {"epoch": 1, "update": 0.427, "loss": "5.286", "ntokens": "127.99", "acc_total": "127.99", "n_correct": "39.55", "wer_total": "127.99", "n_error": "88.24", "ppl": "39", "accuracy": "30.901", "wer": "68.943", "wps": "78.3", "ups": "0.61", "wpb": "128", "bsz": "8", "num_updates": "5400", "lr": "0.0002723", "gnorm": "2.134", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "11497"}
-[2024-06-07 20:02:22,145][train_inner][INFO] - {"epoch": 1, "update": 0.443, "loss": "5.211", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "39.15", "wer_total": "126.81", "n_error": "87.43", "ppl": "37.04", "accuracy": "30.873", "wer": "68.946", "wps": "77.6", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "5600", "lr": "0.0002822", "gnorm": "2.246", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "11824"}
-[2024-06-07 20:07:49,086][train_inner][INFO] - {"epoch": 1, "update": 0.459, "loss": "5.234", "ntokens": "128.39", "acc_total": "128.39", "n_correct": "40.015", "wer_total": "128.39", "n_error": "88.155", "ppl": "37.63", "accuracy": "31.167", "wer": "68.662", "wps": "78.5", "ups": "0.61", "wpb": "128.4", "bsz": "8", "num_updates": "5800", "lr": "0.0002921", "gnorm": "2.21", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "12151"}
-[2024-06-07 20:13:16,141][train_inner][INFO] - {"epoch": 1, "update": 0.474, "loss": "5.203", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "39.545", "wer_total": "127.425", "n_error": "87.615", "ppl": "36.83", "accuracy": "31.034", "wer": "68.758", "wps": "77.9", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "6000", "lr": "0.000302", "gnorm": "2.247", "loss_scale": "512", "train_wall": "326", "gb_free": "7.1", "wall": "12478"}
-[2024-06-07 20:18:43,034][train_inner][INFO] - {"epoch": 1, "update": 0.49, "loss": "5.116", "ntokens": "127.935", "acc_total": "127.935", "n_correct": "40.6", "wer_total": "127.935", "n_error": "87.155", "ppl": "34.67", "accuracy": "31.735", "wer": "68.124", "wps": "78.3", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "6200", "lr": "0.0003119", "gnorm": "2.318", "loss_scale": "1024", "train_wall": "326", "gb_free": "7.1", "wall": "12805"}
-[2024-06-07 20:24:09,994][train_inner][INFO] - {"epoch": 1, "update": 0.506, "loss": "5.082", "ntokens": "128.395", "acc_total": "128.395", "n_correct": "41.065", "wer_total": "128.395", "n_error": "87.19", "ppl": "33.88", "accuracy": "31.983", "wer": "67.908", "wps": "78.5", "ups": "0.61", "wpb": "128.4", "bsz": "8", "num_updates": "6400", "lr": "0.0003218", "gnorm": "2.346", "loss_scale": "1024", "train_wall": "326", "gb_free": "7.1", "wall": "13132"}
-[2024-06-07 20:29:36,898][train_inner][INFO] - {"epoch": 1, "update": 0.522, "loss": "5.082", "ntokens": "127.92", "acc_total": "127.92", "n_correct": "41.65", "wer_total": "127.92", "n_error": "86.055", "ppl": "33.87", "accuracy": "32.559", "wer": "67.273", "wps": "78.3", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "6600", "lr": "0.0003317", "gnorm": "2.368", "loss_scale": "1024", "train_wall": "326", "gb_free": "7.1", "wall": "13458"}
-[2024-06-07 20:35:03,924][train_inner][INFO] - {"epoch": 1, "update": 0.538, "loss": "5.059", "ntokens": "128.695", "acc_total": "128.695", "n_correct": "41.675", "wer_total": "128.695", "n_error": "86.81", "ppl": "33.33", "accuracy": "32.383", "wer": "67.454", "wps": "78.7", "ups": "0.61", "wpb": "128.7", "bsz": "8", "num_updates": "6800", "lr": "0.0003416", "gnorm": "2.374", "loss_scale": "1024", "train_wall": "326", "gb_free": "7.1", "wall": "13786"}
-[2024-06-07 20:40:30,822][train_inner][INFO] - {"epoch": 1, "update": 0.553, "loss": "4.982", "ntokens": "128.315", "acc_total": "128.315", "n_correct": "42.395", "wer_total": "128.315", "n_error": "85.76", "ppl": "31.6", "accuracy": "33.04", "wer": "66.836", "wps": "78.5", "ups": "0.61", "wpb": "128.3", "bsz": "8", "num_updates": "7000", "lr": "0.0003515", "gnorm": "2.413", "loss_scale": "1024", "train_wall": "326", "gb_free": "7.1", "wall": "14112"}
-[2024-06-07 20:45:58,480][train_inner][INFO] - {"epoch": 1, "update": 0.569, "loss": "4.969", "ntokens": "128.155", "acc_total": "128.155", "n_correct": "42.715", "wer_total": "128.155", "n_error": "85.225", "ppl": "31.31", "accuracy": "33.331", "wer": "66.502", "wps": "78.2", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "7200", "lr": "0.0003614", "gnorm": "2.462", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "14440"}
-[2024-06-07 20:51:26,133][train_inner][INFO] - {"epoch": 1, "update": 0.585, "loss": "4.905", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "43.04", "wer_total": "126.975", "n_error": "83.7", "ppl": "29.96", "accuracy": "33.896", "wer": "65.918", "wps": "77.5", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "7400", "lr": "0.0003713", "gnorm": "2.477", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "14768"}
-[2024-06-07 20:54:09,949][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-07 21:16:12,973][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "4.94093", "valid_wer_total": "15.5723", "valid_n_error": "10.6038", "valid_ppl": "nan", "valid_accuracy": "31.729", "valid_wer": "68.094", "valid_wps": "149.1", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "7500", "valid_best_accuracy": "31.729"}
-[2024-06-07 21:16:12,974][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 7500 updates
-[2024-06-07 21:16:12,974][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_7500.pt
-[2024-06-07 21:16:16,156][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_7500.pt
-[2024-06-07 21:16:20,521][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_7500.pt (epoch 1 @ 7500 updates, score 31.729) (writing took 7.54728168604197 seconds)
-[2024-06-07 21:19:04,114][train_inner][INFO] - {"epoch": 1, "update": 0.601, "loss": "4.905", "ntokens": "128.245", "acc_total": "128.245", "n_correct": "43.09", "wer_total": "128.245", "n_error": "84.95", "ppl": "29.96", "accuracy": "33.6", "wer": "66.24", "wps": "15.5", "ups": "0.12", "wpb": "128.2", "bsz": "8", "num_updates": "7600", "lr": "0.0003812", "gnorm": "2.53", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "16426"}
-[2024-06-07 21:24:32,080][train_inner][INFO] - {"epoch": 1, "update": 0.617, "loss": "4.842", "ntokens": "128.505", "acc_total": "128.505", "n_correct": "44.24", "wer_total": "128.505", "n_error": "84.005", "ppl": "28.69", "accuracy": "34.427", "wer": "65.371", "wps": "78.4", "ups": "0.61", "wpb": "128.5", "bsz": "8", "num_updates": "7800", "lr": "0.0003911", "gnorm": "2.537", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "16754"}
-[2024-06-07 21:30:00,117][train_inner][INFO] - {"epoch": 1, "update": 0.633, "loss": "4.867", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "44.155", "wer_total": "127.52", "n_error": "83.18", "ppl": "29.18", "accuracy": "34.626", "wer": "65.229", "wps": "77.7", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "8000", "lr": "0.000401", "gnorm": "2.66", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "17082"}
-[2024-06-07 21:35:27,895][train_inner][INFO] - {"epoch": 1, "update": 0.648, "loss": "4.866", "ntokens": "128.09", "acc_total": "128.09", "n_correct": "43.57", "wer_total": "128.09", "n_error": "84.34", "ppl": "29.16", "accuracy": "34.015", "wer": "65.844", "wps": "78.2", "ups": "0.61", "wpb": "128.1", "bsz": "8", "num_updates": "8200", "lr": "0.0004109", "gnorm": "2.656", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "17409"}
-[2024-06-07 21:40:55,812][train_inner][INFO] - {"epoch": 1, "update": 0.664, "loss": "4.817", "ntokens": "127.165", "acc_total": "127.165", "n_correct": "44.445", "wer_total": "127.165", "n_error": "82.51", "ppl": "28.19", "accuracy": "34.951", "wer": "64.884", "wps": "77.6", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "8400", "lr": "0.0004208", "gnorm": "2.688", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "17737"}
-[2024-06-07 21:46:23,717][train_inner][INFO] - {"epoch": 1, "update": 0.68, "loss": "4.772", "ntokens": "128.38", "acc_total": "128.38", "n_correct": "45.345", "wer_total": "128.38", "n_error": "82.915", "ppl": "27.33", "accuracy": "35.321", "wer": "64.586", "wps": "78.3", "ups": "0.61", "wpb": "128.4", "bsz": "8", "num_updates": "8600", "lr": "0.0004307", "gnorm": "2.744", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "18065"}
-[2024-06-07 21:51:51,665][train_inner][INFO] - {"epoch": 1, "update": 0.696, "loss": "4.78", "ntokens": "128.605", "acc_total": "128.605", "n_correct": "45.235", "wer_total": "128.605", "n_error": "83.21", "ppl": "27.47", "accuracy": "35.174", "wer": "64.702", "wps": "78.4", "ups": "0.61", "wpb": "128.6", "bsz": "8", "num_updates": "8800", "lr": "0.0004406", "gnorm": "2.781", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "18393"}
-[2024-06-07 21:57:19,778][train_inner][INFO] - {"epoch": 1, "update": 0.712, "loss": "4.748", "ntokens": "128.05", "acc_total": "128.05", "n_correct": "45.45", "wer_total": "128.05", "n_error": "82.43", "ppl": "26.87", "accuracy": "35.494", "wer": "64.373", "wps": "78.1", "ups": "0.61", "wpb": "128.1", "bsz": "8", "num_updates": "9000", "lr": "0.0004505", "gnorm": "2.803", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "18721"}
-[2024-06-07 22:02:47,703][train_inner][INFO] - {"epoch": 1, "update": 0.727, "loss": "4.688", "ntokens": "128.705", "acc_total": "128.705", "n_correct": "45.81", "wer_total": "128.705", "n_error": "82.76", "ppl": "25.78", "accuracy": "35.593", "wer": "64.302", "wps": "78.5", "ups": "0.61", "wpb": "128.7", "bsz": "8", "num_updates": "9200", "lr": "0.0004604", "gnorm": "2.821", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "19049"}
-[2024-06-07 22:08:15,854][train_inner][INFO] - {"epoch": 1, "update": 0.743, "loss": "4.723", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "45.195", "wer_total": "126.76", "n_error": "81.44", "ppl": "26.41", "accuracy": "35.654", "wer": "64.247", "wps": "77.3", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "9400", "lr": "0.0004703", "gnorm": "2.867", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "19377"}
-[2024-06-07 22:13:43,682][train_inner][INFO] - {"epoch": 1, "update": 0.759, "loss": "4.703", "ntokens": "127.53", "acc_total": "127.53", "n_correct": "45.435", "wer_total": "127.53", "n_error": "81.92", "ppl": "26.05", "accuracy": "35.627", "wer": "64.236", "wps": "77.8", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "9600", "lr": "0.0004802", "gnorm": "2.947", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "19705"}
-[2024-06-07 22:19:11,582][train_inner][INFO] - {"epoch": 1, "update": 0.775, "loss": "4.667", "ntokens": "128.81", "acc_total": "128.81", "n_correct": "46.315", "wer_total": "128.81", "n_error": "82.355", "ppl": "25.41", "accuracy": "35.956", "wer": "63.935", "wps": "78.6", "ups": "0.61", "wpb": "128.8", "bsz": "8", "num_updates": "9800", "lr": "0.0004901", "gnorm": "2.929", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "20033"}
-[2024-06-07 22:24:39,510][train_inner][INFO] - {"epoch": 1, "update": 0.791, "loss": "4.694", "ntokens": "128.93", "acc_total": "128.93", "n_correct": "46.84", "wer_total": "128.93", "n_error": "81.915", "ppl": "25.89", "accuracy": "36.33", "wer": "63.534", "wps": "78.6", "ups": "0.61", "wpb": "128.9", "bsz": "8", "num_updates": "10000", "lr": "0.0005", "gnorm": "2.919", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "20361"}
-[2024-06-07 22:24:39,510][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-07 22:46:42,992][valid][INFO] - {"epoch": 1, "valid_loss": "4.66", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "6.01745", "valid_wer_total": "15.5723", "valid_n_error": "9.53929", "valid_ppl": "25.29", "valid_accuracy": "38.642", "valid_wer": "61.258", "valid_wps": "149", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "10000", "valid_best_accuracy": "38.642"}
-[2024-06-07 22:46:42,993][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 10000 updates
-[2024-06-07 22:46:42,993][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_10000.pt
-[2024-06-07 22:46:46,145][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_10000.pt
-[2024-06-07 22:46:50,358][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_10000.pt (epoch 1 @ 10000 updates, score 38.642) (writing took 7.364766117010731 seconds)
-[2024-06-07 22:52:18,120][train_inner][INFO] - {"epoch": 1, "update": 0.806, "loss": "4.668", "ntokens": "128.81", "acc_total": "128.81", "n_correct": "48.495", "wer_total": "128.81", "n_error": "80.21", "ppl": "25.43", "accuracy": "37.648", "wer": "62.27", "wps": "15.5", "ups": "0.12", "wpb": "128.8", "bsz": "8", "num_updates": "10200", "lr": "0.000485243", "gnorm": "2.953", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "22020"}
-[2024-06-07 22:53:44,934][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-07 22:57:47,708][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "4.573", "ntokens": "127.87", "acc_total": "127.87", "n_correct": "47.19", "wer_total": "127.87", "n_error": "80.55", "ppl": "23.8", "accuracy": "36.905", "wer": "62.994", "wps": "77.6", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "10400", "lr": "0.000470922", "gnorm": "3.044", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "22349"}
-[2024-06-07 23:03:15,389][train_inner][INFO] - {"epoch": 1, "update": 0.838, "loss": "4.599", "ntokens": "127.445", "acc_total": "127.445", "n_correct": "46.97", "wer_total": "127.445", "n_error": "80.295", "ppl": "24.24", "accuracy": "36.855", "wer": "63.004", "wps": "77.8", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "10600", "lr": "0.000457024", "gnorm": "3.032", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "22677"}
-[2024-06-07 23:08:43,445][train_inner][INFO] - {"epoch": 1, "update": 0.854, "loss": "4.479", "ntokens": "128.18", "acc_total": "128.18", "n_correct": "48.1", "wer_total": "128.18", "n_error": "79.88", "ppl": "22.31", "accuracy": "37.525", "wer": "62.319", "wps": "78.1", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "10800", "lr": "0.000443536", "gnorm": "3.026", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "23005"}
-[2024-06-07 23:14:11,276][train_inner][INFO] - {"epoch": 1, "update": 0.87, "loss": "4.509", "ntokens": "127.95", "acc_total": "127.95", "n_correct": "48.6", "wer_total": "127.95", "n_error": "79.17", "ppl": "22.77", "accuracy": "37.984", "wer": "61.876", "wps": "78.1", "ups": "0.61", "wpb": "128", "bsz": "8", "num_updates": "11000", "lr": "0.000430446", "gnorm": "3.014", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "23333"}
-[2024-06-07 23:19:39,419][train_inner][INFO] - {"epoch": 1, "update": 0.886, "loss": "4.537", "ntokens": "127.915", "acc_total": "127.915", "n_correct": "48.145", "wer_total": "127.915", "n_error": "79.58", "ppl": "23.21", "accuracy": "37.638", "wer": "62.213", "wps": "78", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "11200", "lr": "0.000417742", "gnorm": "3.062", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "23661"}
-[2024-06-07 23:25:07,435][train_inner][INFO] - {"epoch": 1, "update": 0.901, "loss": "4.449", "ntokens": "127.985", "acc_total": "127.985", "n_correct": "49.135", "wer_total": "127.985", "n_error": "78.695", "ppl": "21.84", "accuracy": "38.391", "wer": "61.488", "wps": "78", "ups": "0.61", "wpb": "128", "bsz": "8", "num_updates": "11400", "lr": "0.000405413", "gnorm": "3.017", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "23989"}
-[2024-06-07 23:30:35,513][train_inner][INFO] - {"epoch": 1, "update": 0.917, "loss": "4.448", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "48.85", "wer_total": "126.795", "n_error": "77.78", "ppl": "21.82", "accuracy": "38.527", "wer": "61.343", "wps": "77.3", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "11600", "lr": "0.000393448", "gnorm": "3.091", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "24317"}
-[2024-06-07 23:36:03,551][train_inner][INFO] - {"epoch": 1, "update": 0.933, "loss": "4.439", "ntokens": "127.205", "acc_total": "127.205", "n_correct": "49.045", "wer_total": "127.205", "n_error": "78.035", "ppl": "21.7", "accuracy": "38.556", "wer": "61.346", "wps": "77.6", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "11800", "lr": "0.000381836", "gnorm": "3.074", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "24645"}
-[2024-06-07 23:41:31,508][train_inner][INFO] - {"epoch": 1, "update": 0.949, "loss": "4.383", "ntokens": "128.37", "acc_total": "128.37", "n_correct": "50.28", "wer_total": "128.37", "n_error": "77.95", "ppl": "20.87", "accuracy": "39.168", "wer": "60.723", "wps": "78.3", "ups": "0.61", "wpb": "128.4", "bsz": "8", "num_updates": "12000", "lr": "0.000370567", "gnorm": "3.042", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "24973"}
-[2024-06-07 23:46:59,514][train_inner][INFO] - {"epoch": 1, "update": 0.965, "loss": "4.422", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "49.59", "wer_total": "127.515", "n_error": "77.765", "ppl": "21.43", "accuracy": "38.89", "wer": "60.985", "wps": "77.8", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "12200", "lr": "0.000359631", "gnorm": "3.153", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "25301"}
-[2024-06-07 23:52:27,452][train_inner][INFO] - {"epoch": 1, "update": 0.98, "loss": "4.306", "ntokens": "130.215", "acc_total": "130.215", "n_correct": "51.88", "wer_total": "130.215", "n_error": "78.22", "ppl": "19.78", "accuracy": "39.842", "wer": "60.07", "wps": "79.4", "ups": "0.61", "wpb": "130.2", "bsz": "8", "num_updates": "12400", "lr": "0.000349017", "gnorm": "3.116", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "25629"}
-[2024-06-07 23:55:11,412][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 00:17:15,549][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "6.1791", "valid_wer_total": "15.5723", "valid_n_error": "9.3755", "valid_ppl": "nan", "valid_accuracy": "39.68", "valid_wer": "60.206", "valid_wps": "148.9", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "12500", "valid_best_accuracy": "39.68"}
-[2024-06-08 00:17:15,550][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12500 updates
-[2024-06-08 00:17:15,550][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12500.pt
-[2024-06-08 00:17:18,780][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12500.pt
-[2024-06-08 00:17:22,886][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12500.pt (epoch 1 @ 12500 updates, score 39.68) (writing took 7.335984774981625 seconds)
-[2024-06-08 00:20:06,629][train_inner][INFO] - {"epoch": 1, "update": 0.996, "loss": "4.315", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "50.5", "wer_total": "127.32", "n_error": "76.695", "ppl": "19.9", "accuracy": "39.664", "wer": "60.238", "wps": "15.3", "ups": "0.12", "wpb": "127.3", "bsz": "8", "num_updates": "12600", "lr": "0.000338716", "gnorm": "3.126", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "27288"}
-[2024-06-08 00:21:22,972][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 00:43:27,220][valid][INFO] - {"epoch": 1, "valid_loss": "4.323", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "6.16868", "valid_wer_total": "15.5723", "valid_n_error": "9.38411", "valid_ppl": "20.01", "valid_accuracy": "39.613", "valid_wer": "60.262", "valid_wps": "148.9", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "12647", "valid_best_accuracy": "39.68"}
-[2024-06-08 00:43:27,221][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12647 updates
-[2024-06-08 00:43:27,221][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_last.pt
-[2024-06-08 00:43:31,069][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_last.pt
-[2024-06-08 00:43:31,137][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 1 @ 12647 updates, score 39.613) (writing took 3.9168170469929464 seconds)
-[2024-06-08 00:43:31,138][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below)
-[2024-06-08 00:43:31,140][train][INFO] - {"epoch": 1, "train_loss": "5.192", "train_ntokens": "128.043", "train_acc_total": "128.043", "train_n_correct": "39.8893", "train_wer_total": "128.043", "train_n_error": "87.9571", "train_ppl": "36.56", "train_accuracy": "31.153", "train_wer": "68.694", "train_wps": "56.4", "train_ups": "0.44", "train_wpb": "128", "train_bsz": "8", "train_num_updates": "12647", "train_lr": "0.00033634", "train_gnorm": "2.678", "train_loss_scale": "4096", "train_train_wall": "20665", "train_gb_free": "7.1", "train_wall": "28693"}
-[2024-06-08 00:43:31,182][fairseq.trainer][INFO] - begin training epoch 2
-[2024-06-08 00:43:31,182][fairseq_cli.train][INFO] - Start iterating over samples
-[2024-06-08 00:47:42,237][train_inner][INFO] - {"epoch": 2, "update": 1.012, "loss": "4.132", "ntokens": "127.975", "acc_total": "127.975", "n_correct": "53.53", "wer_total": "127.975", "n_error": "74.315", "ppl": "17.53", "accuracy": "41.828", "wer": "58.07", "wps": "15.5", "ups": "0.12", "wpb": "128", "bsz": "8", "num_updates": "12800", "lr": "0.00032872", "gnorm": "3.093", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "28944"}
-[2024-06-08 00:53:10,311][train_inner][INFO] - {"epoch": 2, "update": 1.028, "loss": "4.081", "ntokens": "128.185", "acc_total": "128.185", "n_correct": "53.465", "wer_total": "128.185", "n_error": "74.535", "ppl": "16.93", "accuracy": "41.709", "wer": "58.146", "wps": "78.1", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "13000", "lr": "0.000319018", "gnorm": "3.149", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "29272"}
-[2024-06-08 00:53:33,193][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 00:58:39,992][train_inner][INFO] - {"epoch": 2, "update": 1.044, "loss": "4.161", "ntokens": "128.485", "acc_total": "128.485", "n_correct": "53.52", "wer_total": "128.485", "n_error": "74.87", "ppl": "17.89", "accuracy": "41.655", "wer": "58.271", "wps": "77.9", "ups": "0.61", "wpb": "128.5", "bsz": "8", "num_updates": "13200", "lr": "0.000309603", "gnorm": "3.166", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "29602"}
-[2024-06-08 01:04:08,063][train_inner][INFO] - {"epoch": 2, "update": 1.06, "loss": "4.087", "ntokens": "128.945", "acc_total": "128.945", "n_correct": "53.825", "wer_total": "128.945", "n_error": "75.015", "ppl": "17", "accuracy": "41.743", "wer": "58.176", "wps": "78.6", "ups": "0.61", "wpb": "128.9", "bsz": "8", "num_updates": "13400", "lr": "0.000300466", "gnorm": "3.147", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "29930"}
-[2024-06-08 01:09:36,270][train_inner][INFO] - {"epoch": 2, "update": 1.075, "loss": "4.048", "ntokens": "128.04", "acc_total": "128.04", "n_correct": "53.75", "wer_total": "128.04", "n_error": "74.08", "ppl": "16.54", "accuracy": "41.979", "wer": "57.857", "wps": "78", "ups": "0.61", "wpb": "128", "bsz": "8", "num_updates": "13600", "lr": "0.000291598", "gnorm": "3.13", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "30258"}
-[2024-06-08 01:15:04,578][train_inner][INFO] - {"epoch": 2, "update": 1.091, "loss": "4.097", "ntokens": "128.705", "acc_total": "128.705", "n_correct": "53.725", "wer_total": "128.705", "n_error": "74.85", "ppl": "17.11", "accuracy": "41.743", "wer": "58.156", "wps": "78.4", "ups": "0.61", "wpb": "128.7", "bsz": "8", "num_updates": "13800", "lr": "0.000282992", "gnorm": "3.205", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "30586"}
-[2024-06-08 01:20:32,709][train_inner][INFO] - {"epoch": 2, "update": 1.107, "loss": "4.073", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "53.34", "wer_total": "126.77", "n_error": "73.32", "ppl": "16.83", "accuracy": "42.076", "wer": "57.837", "wps": "77.3", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "14000", "lr": "0.00027464", "gnorm": "3.188", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "30914"}
-[2024-06-08 01:26:01,025][train_inner][INFO] - {"epoch": 2, "update": 1.123, "loss": "4.023", "ntokens": "128.03", "acc_total": "128.03", "n_correct": "54.3", "wer_total": "128.03", "n_error": "73.595", "ppl": "16.26", "accuracy": "42.412", "wer": "57.483", "wps": "78", "ups": "0.61", "wpb": "128", "bsz": "8", "num_updates": "14200", "lr": "0.000266535", "gnorm": "3.216", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "31243"}
-[2024-06-08 01:31:29,241][train_inner][INFO] - {"epoch": 2, "update": 1.139, "loss": "4.012", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "54.88", "wer_total": "127.94", "n_error": "72.925", "ppl": "16.14", "accuracy": "42.895", "wer": "56.999", "wps": "78", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "14400", "lr": "0.000258668", "gnorm": "3.187", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "31571"}
-[2024-06-08 01:36:57,477][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "4.032", "ntokens": "127.65", "acc_total": "127.65", "n_correct": "54.425", "wer_total": "127.65", "n_error": "73.09", "ppl": "16.35", "accuracy": "42.636", "wer": "57.258", "wps": "77.8", "ups": "0.61", "wpb": "127.7", "bsz": "8", "num_updates": "14600", "lr": "0.000251034", "gnorm": "3.159", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "31899"}
-[2024-06-08 01:42:25,727][train_inner][INFO] - {"epoch": 2, "update": 1.17, "loss": "4.05", "ntokens": "128.675", "acc_total": "128.675", "n_correct": "54.86", "wer_total": "128.675", "n_error": "73.7", "ppl": "16.56", "accuracy": "42.635", "wer": "57.276", "wps": "78.4", "ups": "0.61", "wpb": "128.7", "bsz": "8", "num_updates": "14800", "lr": "0.000243626", "gnorm": "3.174", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "32227"}
-[2024-06-08 01:47:53,909][train_inner][INFO] - {"epoch": 2, "update": 1.186, "loss": "3.922", "ntokens": "128.545", "acc_total": "128.545", "n_correct": "55.795", "wer_total": "128.545", "n_error": "72.645", "ppl": "15.16", "accuracy": "43.405", "wer": "56.513", "wps": "78.3", "ups": "0.61", "wpb": "128.5", "bsz": "8", "num_updates": "15000", "lr": "0.000236435", "gnorm": "3.126", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "32556"}
-[2024-06-08 01:47:53,909][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 02:09:58,074][valid][INFO] - {"epoch": 2, "valid_loss": "4.14", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "6.45376", "valid_wer_total": "15.5723", "valid_n_error": "9.10061", "valid_ppl": "17.63", "valid_accuracy": "41.444", "valid_wer": "58.441", "valid_wps": "148.9", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "41.444"}
-[2024-06-08 02:09:58,075][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 15000 updates
-[2024-06-08 02:09:58,075][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_15000.pt
-[2024-06-08 02:10:01,201][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_15000.pt
-[2024-06-08 02:10:05,312][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_15000.pt (epoch 2 @ 15000 updates, score 41.444) (writing took 7.237201154988725 seconds)
-[2024-06-08 02:15:33,166][train_inner][INFO] - {"epoch": 2, "update": 1.202, "loss": "3.98", "ntokens": "127.225", "acc_total": "127.225", "n_correct": "54.9", "wer_total": "127.225", "n_error": "72.195", "ppl": "15.78", "accuracy": "43.152", "wer": "56.746", "wps": "15.3", "ups": "0.12", "wpb": "127.2", "bsz": "8", "num_updates": "15200", "lr": "0.000229457", "gnorm": "3.215", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "34215"}
-[2024-06-08 02:21:01,374][train_inner][INFO] - {"epoch": 2, "update": 1.218, "loss": "3.942", "ntokens": "128.9", "acc_total": "128.9", "n_correct": "56.03", "wer_total": "128.9", "n_error": "72.755", "ppl": "15.37", "accuracy": "43.468", "wer": "56.443", "wps": "78.5", "ups": "0.61", "wpb": "128.9", "bsz": "8", "num_updates": "15400", "lr": "0.000222685", "gnorm": "3.234", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "34543"}
-[2024-06-08 02:21:47,234][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 02:26:30,814][train_inner][INFO] - {"epoch": 2, "update": 1.234, "loss": "4.008", "ntokens": "129.335", "acc_total": "129.335", "n_correct": "55.445", "wer_total": "129.335", "n_error": "73.775", "ppl": "16.09", "accuracy": "42.869", "wer": "57.042", "wps": "78.5", "ups": "0.61", "wpb": "129.3", "bsz": "8", "num_updates": "15600", "lr": "0.000216113", "gnorm": "3.146", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "34872"}
-[2024-06-08 02:31:58,828][train_inner][INFO] - {"epoch": 2, "update": 1.249, "loss": "3.913", "ntokens": "128.445", "acc_total": "128.445", "n_correct": "56.4", "wer_total": "128.445", "n_error": "71.92", "ppl": "15.06", "accuracy": "43.91", "wer": "55.993", "wps": "78.3", "ups": "0.61", "wpb": "128.4", "bsz": "8", "num_updates": "15800", "lr": "0.000209735", "gnorm": "3.16", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "35200"}
-[2024-06-08 02:37:26,890][train_inner][INFO] - {"epoch": 2, "update": 1.265, "loss": "3.906", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "55.405", "wer_total": "127.64", "n_error": "72.12", "ppl": "14.99", "accuracy": "43.407", "wer": "56.503", "wps": "77.8", "ups": "0.61", "wpb": "127.6", "bsz": "8", "num_updates": "16000", "lr": "0.000203545", "gnorm": "3.167", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "35528"}
-[2024-06-08 02:42:54,907][train_inner][INFO] - {"epoch": 2, "update": 1.281, "loss": "3.963", "ntokens": "128.965", "acc_total": "128.965", "n_correct": "56.18", "wer_total": "128.965", "n_error": "72.7", "ppl": "15.59", "accuracy": "43.562", "wer": "56.372", "wps": "78.6", "ups": "0.61", "wpb": "129", "bsz": "8", "num_updates": "16200", "lr": "0.000197538", "gnorm": "3.156", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "35857"}
-[2024-06-08 02:48:23,081][train_inner][INFO] - {"epoch": 2, "update": 1.297, "loss": "3.907", "ntokens": "128.63", "acc_total": "128.63", "n_correct": "56", "wer_total": "128.63", "n_error": "72.455", "ppl": "15", "accuracy": "43.536", "wer": "56.328", "wps": "78.4", "ups": "0.61", "wpb": "128.6", "bsz": "8", "num_updates": "16400", "lr": "0.000191708", "gnorm": "3.118", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "36185"}
-[2024-06-08 02:53:51,290][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "3.938", "ntokens": "127.325", "acc_total": "127.325", "n_correct": "55.515", "wer_total": "127.325", "n_error": "71.665", "ppl": "15.32", "accuracy": "43.601", "wer": "56.285", "wps": "77.6", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "16600", "lr": "0.00018605", "gnorm": "3.179", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "36513"}
-[2024-06-08 02:59:19,569][train_inner][INFO] - {"epoch": 2, "update": 1.329, "loss": "3.852", "ntokens": "128.69", "acc_total": "128.69", "n_correct": "57.14", "wer_total": "128.69", "n_error": "71.44", "ppl": "14.44", "accuracy": "44.401", "wer": "55.513", "wps": "78.4", "ups": "0.61", "wpb": "128.7", "bsz": "8", "num_updates": "16800", "lr": "0.000180559", "gnorm": "3.179", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "36841"}
-[2024-06-08 03:04:47,981][train_inner][INFO] - {"epoch": 2, "update": 1.344, "loss": "3.813", "ntokens": "127.87", "acc_total": "127.87", "n_correct": "57.3", "wer_total": "127.87", "n_error": "70.435", "ppl": "14.05", "accuracy": "44.811", "wer": "55.083", "wps": "77.9", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "17000", "lr": "0.00017523", "gnorm": "3.186", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "37170"}
-[2024-06-08 03:10:16,740][train_inner][INFO] - {"epoch": 2, "update": 1.36, "loss": "3.855", "ntokens": "127.83", "acc_total": "127.83", "n_correct": "56.775", "wer_total": "127.83", "n_error": "70.94", "ppl": "14.47", "accuracy": "44.414", "wer": "55.496", "wps": "77.8", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "17200", "lr": "0.000170059", "gnorm": "3.157", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "37498"}
-[2024-06-08 03:15:45,501][train_inner][INFO] - {"epoch": 2, "update": 1.376, "loss": "3.922", "ntokens": "128.78", "acc_total": "128.78", "n_correct": "55.965", "wer_total": "128.78", "n_error": "72.72", "ppl": "15.16", "accuracy": "43.458", "wer": "56.468", "wps": "78.3", "ups": "0.61", "wpb": "128.8", "bsz": "8", "num_updates": "17400", "lr": "0.00016504", "gnorm": "3.127", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "37827"}
-[2024-06-08 03:18:29,789][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 03:40:35,704][valid][INFO] - {"epoch": 2, "valid_loss": "3.942", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "6.79713", "valid_wer_total": "15.5723", "valid_n_error": "8.75961", "valid_ppl": "15.37", "valid_accuracy": "43.649", "valid_wer": "56.251", "valid_wps": "148.7", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "17500", "valid_best_accuracy": "43.649"}
-[2024-06-08 03:40:35,705][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 17500 updates
-[2024-06-08 03:40:35,705][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_17500.pt
-[2024-06-08 03:40:38,893][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_17500.pt
-[2024-06-08 03:40:43,053][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_17500.pt (epoch 2 @ 17500 updates, score 43.649) (writing took 7.3483391019981354 seconds)
-[2024-06-08 03:43:26,817][train_inner][INFO] - {"epoch": 2, "update": 1.392, "loss": "3.802", "ntokens": "128.515", "acc_total": "128.515", "n_correct": "57.645", "wer_total": "128.515", "n_error": "70.73", "ppl": "13.95", "accuracy": "44.855", "wer": "55.036", "wps": "15.5", "ups": "0.12", "wpb": "128.5", "bsz": "8", "num_updates": "17600", "lr": "0.000160169", "gnorm": "3.071", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "39488"}
-[2024-06-08 03:48:55,286][train_inner][INFO] - {"epoch": 2, "update": 1.408, "loss": "3.862", "ntokens": "128.23", "acc_total": "128.23", "n_correct": "57.015", "wer_total": "128.23", "n_error": "71.16", "ppl": "14.54", "accuracy": "44.463", "wer": "55.494", "wps": "78.1", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "17800", "lr": "0.000155442", "gnorm": "3.093", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "39817"}
-[2024-06-08 03:54:23,275][train_inner][INFO] - {"epoch": 2, "update": 1.423, "loss": "3.824", "ntokens": "127.805", "acc_total": "127.805", "n_correct": "57.03", "wer_total": "127.805", "n_error": "70.65", "ppl": "14.17", "accuracy": "44.623", "wer": "55.28", "wps": "77.9", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "18000", "lr": "0.000150854", "gnorm": "3.134", "loss_scale": "4096", "train_wall": "327", "gb_free": "7.1", "wall": "40145"}
-[2024-06-08 03:58:48,952][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 04:00:28,876][train_inner][INFO] - {"epoch": 2, "update": 1.439, "loss": "3.692", "ntokens": "127.18", "acc_total": "127.18", "n_correct": "59.425", "wer_total": "127.18", "n_error": "67.635", "ppl": "12.93", "accuracy": "46.725", "wer": "53.181", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "18200", "lr": "0.000146402", "gnorm": "4.047", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "40510"}
-[2024-06-08 04:06:32,775][train_inner][INFO] - {"epoch": 2, "update": 1.455, "loss": "3.624", "ntokens": "128.1", "acc_total": "128.1", "n_correct": "60.03", "wer_total": "128.1", "n_error": "67.95", "ppl": "12.33", "accuracy": "46.862", "wer": "53.044", "wps": "70.4", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "18400", "lr": "0.000142081", "gnorm": "3.975", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "40874"}
-[2024-06-08 04:12:36,401][train_inner][INFO] - {"epoch": 2, "update": 1.471, "loss": "3.602", "ntokens": "127.705", "acc_total": "127.705", "n_correct": "60.175", "wer_total": "127.705", "n_error": "67.395", "ppl": "12.14", "accuracy": "47.12", "wer": "52.774", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "18600", "lr": "0.000137888", "gnorm": "3.853", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "41238"}
-[2024-06-08 04:18:40,235][train_inner][INFO] - {"epoch": 2, "update": 1.487, "loss": "3.659", "ntokens": "127.4", "acc_total": "127.4", "n_correct": "59.465", "wer_total": "127.4", "n_error": "67.865", "ppl": "12.63", "accuracy": "46.676", "wer": "53.269", "wps": "70", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "18800", "lr": "0.000133819", "gnorm": "3.856", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "41602"}
-[2024-06-08 04:24:43,909][train_inner][INFO] - {"epoch": 2, "update": 1.503, "loss": "3.606", "ntokens": "127.87", "acc_total": "127.87", "n_correct": "60.7", "wer_total": "127.87", "n_error": "67.065", "ppl": "12.18", "accuracy": "47.47", "wer": "52.448", "wps": "70.3", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "19000", "lr": "0.000129869", "gnorm": "3.808", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "41966"}
-[2024-06-08 04:30:47,827][train_inner][INFO] - {"epoch": 2, "update": 1.518, "loss": "3.56", "ntokens": "127.31", "acc_total": "127.31", "n_correct": "61.015", "wer_total": "127.31", "n_error": "66.2", "ppl": "11.79", "accuracy": "47.926", "wer": "51.999", "wps": "70", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "19200", "lr": "0.000126036", "gnorm": "3.867", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "42329"}
-[2024-06-08 04:36:51,569][train_inner][INFO] - {"epoch": 2, "update": 1.534, "loss": "3.462", "ntokens": "128.845", "acc_total": "128.845", "n_correct": "62.685", "wer_total": "128.845", "n_error": "66.08", "ppl": "11.02", "accuracy": "48.651", "wer": "51.286", "wps": "70.8", "ups": "0.55", "wpb": "128.8", "bsz": "8", "num_updates": "19400", "lr": "0.000122317", "gnorm": "3.767", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "42693"}
-[2024-06-08 04:42:55,048][train_inner][INFO] - {"epoch": 2, "update": 1.55, "loss": "3.456", "ntokens": "128.305", "acc_total": "128.305", "n_correct": "63.275", "wer_total": "128.305", "n_error": "64.965", "ppl": "10.97", "accuracy": "49.316", "wer": "50.633", "wps": "70.6", "ups": "0.55", "wpb": "128.3", "bsz": "8", "num_updates": "19600", "lr": "0.000118707", "gnorm": "3.749", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "43057"}
-[2024-06-08 04:48:58,749][train_inner][INFO] - {"epoch": 2, "update": 1.566, "loss": "3.489", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "62.38", "wer_total": "127.465", "n_error": "64.975", "ppl": "11.23", "accuracy": "48.939", "wer": "50.975", "wps": "70.1", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "19800", "lr": "0.000115203", "gnorm": "3.803", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "43420"}
-[2024-06-08 04:55:02,496][train_inner][INFO] - {"epoch": 2, "update": 1.582, "loss": "3.454", "ntokens": "128.485", "acc_total": "128.485", "n_correct": "63.19", "wer_total": "128.485", "n_error": "65.23", "ppl": "10.96", "accuracy": "49.181", "wer": "50.769", "wps": "70.6", "ups": "0.55", "wpb": "128.5", "bsz": "8", "num_updates": "20000", "lr": "0.000111803", "gnorm": "3.806", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "43784"}
-[2024-06-08 04:55:02,496][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 05:17:04,643][valid][INFO] - {"epoch": 2, "valid_loss": "3.575", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "7.42446", "valid_wer_total": "15.5723", "valid_n_error": "8.13275", "valid_ppl": "11.92", "valid_accuracy": "47.677", "valid_wer": "52.226", "valid_wps": "149.1", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "20000", "valid_best_accuracy": "47.677"}
-[2024-06-08 05:17:04,644][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 20000 updates
-[2024-06-08 05:17:04,644][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_20000.pt
-[2024-06-08 05:17:07,812][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_20000.pt
-[2024-06-08 05:17:11,916][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_20000.pt (epoch 2 @ 20000 updates, score 47.677) (writing took 7.272439671040047 seconds)
-[2024-06-08 05:23:15,446][train_inner][INFO] - {"epoch": 2, "update": 1.597, "loss": "3.449", "ntokens": "127.675", "acc_total": "127.675", "n_correct": "63.18", "wer_total": "127.675", "n_error": "64.38", "ppl": "10.92", "accuracy": "49.485", "wer": "50.425", "wps": "15.1", "ups": "0.12", "wpb": "127.7", "bsz": "8", "num_updates": "20200", "lr": "0.000108504", "gnorm": "3.786", "loss_scale": "4096", "train_wall": "363", "gb_free": "6.5", "wall": "45477"}
-[2024-06-08 05:24:31,708][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 05:29:20,846][train_inner][INFO] - {"epoch": 2, "update": 1.613, "loss": "3.422", "ntokens": "127.81", "acc_total": "127.81", "n_correct": "63.695", "wer_total": "127.81", "n_error": "64.06", "ppl": "10.72", "accuracy": "49.836", "wer": "50.121", "wps": "70", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "20400", "lr": "0.000105301", "gnorm": "3.93", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "45842"}
-[2024-06-08 05:35:24,566][train_inner][INFO] - {"epoch": 2, "update": 1.629, "loss": "3.321", "ntokens": "127.675", "acc_total": "127.675", "n_correct": "65.2", "wer_total": "127.675", "n_error": "62.385", "ppl": "10", "accuracy": "51.067", "wer": "48.862", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "20600", "lr": "0.000102194", "gnorm": "3.778", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "46206"}
-[2024-06-08 05:41:28,540][train_inner][INFO] - {"epoch": 2, "update": 1.645, "loss": "3.35", "ntokens": "128.22", "acc_total": "128.22", "n_correct": "65.52", "wer_total": "128.22", "n_error": "62.62", "ppl": "10.2", "accuracy": "51.1", "wer": "48.838", "wps": "70.5", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "20800", "lr": "9.91776e-05", "gnorm": "3.761", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "46570"}
-[2024-06-08 05:47:32,412][train_inner][INFO] - {"epoch": 2, "update": 1.661, "loss": "3.344", "ntokens": "128.145", "acc_total": "128.145", "n_correct": "65.24", "wer_total": "128.145", "n_error": "62.815", "ppl": "10.15", "accuracy": "50.911", "wer": "49.019", "wps": "70.4", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "21000", "lr": "9.62506e-05", "gnorm": "3.803", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "46934"}
-[2024-06-08 05:53:36,330][train_inner][INFO] - {"epoch": 2, "update": 1.677, "loss": "3.371", "ntokens": "127.55", "acc_total": "127.55", "n_correct": "64.23", "wer_total": "127.55", "n_error": "63.265", "ppl": "10.35", "accuracy": "50.357", "wer": "49.6", "wps": "70.1", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "21200", "lr": "9.341e-05", "gnorm": "3.751", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "47298"}
-[2024-06-08 05:59:40,049][train_inner][INFO] - {"epoch": 2, "update": 1.692, "loss": "3.374", "ntokens": "128.52", "acc_total": "128.52", "n_correct": "64.63", "wer_total": "128.52", "n_error": "63.795", "ppl": "10.37", "accuracy": "50.288", "wer": "49.638", "wps": "70.7", "ups": "0.55", "wpb": "128.5", "bsz": "8", "num_updates": "21400", "lr": "9.06532e-05", "gnorm": "3.767", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "47662"}
-[2024-06-08 06:05:43,769][train_inner][INFO] - {"epoch": 2, "update": 1.708, "loss": "3.353", "ntokens": "127.71", "acc_total": "127.71", "n_correct": "64.57", "wer_total": "127.71", "n_error": "63.06", "ppl": "10.22", "accuracy": "50.56", "wer": "49.377", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "21600", "lr": "8.79777e-05", "gnorm": "3.813", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "48025"}
-[2024-06-08 06:11:47,487][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "3.197", "ntokens": "128.015", "acc_total": "128.015", "n_correct": "66.62", "wer_total": "128.015", "n_error": "61.31", "ppl": "9.17", "accuracy": "52.041", "wer": "47.893", "wps": "70.4", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "21800", "lr": "8.53812e-05", "gnorm": "3.685", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "48389"}
-[2024-06-08 06:17:51,092][train_inner][INFO] - {"epoch": 2, "update": 1.74, "loss": "3.27", "ntokens": "128.89", "acc_total": "128.89", "n_correct": "66.275", "wer_total": "128.89", "n_error": "62.535", "ppl": "9.65", "accuracy": "51.42", "wer": "48.518", "wps": "70.9", "ups": "0.55", "wpb": "128.9", "bsz": "8", "num_updates": "22000", "lr": "8.28614e-05", "gnorm": "3.707", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "48753"}
-[2024-06-08 06:23:54,811][train_inner][INFO] - {"epoch": 2, "update": 1.756, "loss": "3.292", "ntokens": "127.725", "acc_total": "127.725", "n_correct": "65.13", "wer_total": "127.725", "n_error": "62.495", "ppl": "9.8", "accuracy": "50.992", "wer": "48.929", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "22200", "lr": "8.04159e-05", "gnorm": "3.71", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "49116"}
-[2024-06-08 06:29:58,526][train_inner][INFO] - {"epoch": 2, "update": 1.771, "loss": "3.315", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "64.505", "wer_total": "127.06", "n_error": "62.465", "ppl": "9.95", "accuracy": "50.767", "wer": "49.162", "wps": "69.9", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "22400", "lr": "7.80425e-05", "gnorm": "3.765", "loss_scale": "4096", "train_wall": "363", "gb_free": "6.5", "wall": "49480"}
-[2024-06-08 06:33:00,498][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 06:55:05,814][valid][INFO] - {"epoch": 2, "valid_loss": "3.381", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "7.7762", "valid_wer_total": "15.5723", "valid_n_error": "7.78394", "valid_ppl": "10.42", "valid_accuracy": "49.936", "valid_wer": "49.986", "valid_wps": "148.8", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "22500", "valid_best_accuracy": "49.936"}
-[2024-06-08 06:55:05,815][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 22500 updates
-[2024-06-08 06:55:05,815][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_22500.pt
-[2024-06-08 06:55:08,992][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_22500.pt
-[2024-06-08 06:55:13,155][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_22500.pt (epoch 2 @ 22500 updates, score 49.936) (writing took 7.3407288460293785 seconds)
-[2024-06-08 06:57:12,851][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 06:58:16,521][train_inner][INFO] - {"epoch": 2, "update": 1.787, "loss": "3.251", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "65.625", "wer_total": "127.425", "n_error": "61.725", "ppl": "9.52", "accuracy": "51.501", "wer": "48.44", "wps": "15", "ups": "0.12", "wpb": "127.4", "bsz": "8", "num_updates": "22600", "lr": "7.57393e-05", "gnorm": "3.792", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "51178"}
-[2024-06-08 07:04:20,399][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "3.26", "ntokens": "128.3", "acc_total": "128.3", "n_correct": "66.075", "wer_total": "128.3", "n_error": "62.135", "ppl": "9.58", "accuracy": "51.5", "wer": "48.429", "wps": "70.5", "ups": "0.55", "wpb": "128.3", "bsz": "8", "num_updates": "22800", "lr": "7.3504e-05", "gnorm": "3.73", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "51542"}
-[2024-06-08 07:10:24,177][train_inner][INFO] - {"epoch": 2, "update": 1.819, "loss": "3.259", "ntokens": "127.995", "acc_total": "127.995", "n_correct": "65.94", "wer_total": "127.995", "n_error": "61.99", "ppl": "9.57", "accuracy": "51.518", "wer": "48.432", "wps": "70.4", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "23000", "lr": "7.13346e-05", "gnorm": "3.746", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "51906"}
-[2024-06-08 07:16:28,147][train_inner][INFO] - {"epoch": 2, "update": 1.835, "loss": "3.171", "ntokens": "127.705", "acc_total": "127.705", "n_correct": "66.95", "wer_total": "127.705", "n_error": "60.65", "ppl": "9", "accuracy": "52.426", "wer": "47.492", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "23200", "lr": "6.92293e-05", "gnorm": "3.73", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "52270"}
-[2024-06-08 07:22:31,900][train_inner][INFO] - {"epoch": 2, "update": 1.851, "loss": "3.184", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "67.175", "wer_total": "127.485", "n_error": "60.235", "ppl": "9.09", "accuracy": "52.692", "wer": "47.249", "wps": "70.1", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "23400", "lr": "6.71862e-05", "gnorm": "3.749", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "52633"}
-[2024-06-08 07:28:35,678][train_inner][INFO] - {"epoch": 2, "update": 1.866, "loss": "3.184", "ntokens": "128.21", "acc_total": "128.21", "n_correct": "67.35", "wer_total": "128.21", "n_error": "60.795", "ppl": "9.09", "accuracy": "52.531", "wer": "47.418", "wps": "70.5", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "23600", "lr": "6.52033e-05", "gnorm": "3.678", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "52997"}
-[2024-06-08 07:34:39,742][train_inner][INFO] - {"epoch": 2, "update": 1.882, "loss": "3.196", "ntokens": "127.805", "acc_total": "127.805", "n_correct": "67.01", "wer_total": "127.805", "n_error": "60.73", "ppl": "9.16", "accuracy": "52.431", "wer": "47.518", "wps": "70.2", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "23800", "lr": "6.3279e-05", "gnorm": "3.759", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "53361"}
-[2024-06-08 07:40:43,495][train_inner][INFO] - {"epoch": 2, "update": 1.898, "loss": "3.216", "ntokens": "126.91", "acc_total": "126.91", "n_correct": "66.305", "wer_total": "126.91", "n_error": "60.515", "ppl": "9.29", "accuracy": "52.246", "wer": "47.683", "wps": "69.8", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "24000", "lr": "6.14114e-05", "gnorm": "3.759", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "53725"}
-[2024-06-08 07:46:47,398][train_inner][INFO] - {"epoch": 2, "update": 1.914, "loss": "3.186", "ntokens": "127.95", "acc_total": "127.95", "n_correct": "67.215", "wer_total": "127.95", "n_error": "60.67", "ppl": "9.1", "accuracy": "52.532", "wer": "47.417", "wps": "70.3", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "24200", "lr": "5.9599e-05", "gnorm": "3.798", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "54089"}
-[2024-06-08 07:52:50,970][train_inner][INFO] - {"epoch": 2, "update": 1.93, "loss": "3.105", "ntokens": "128.93", "acc_total": "128.93", "n_correct": "68.45", "wer_total": "128.93", "n_error": "60.415", "ppl": "8.6", "accuracy": "53.091", "wer": "46.859", "wps": "70.9", "ups": "0.55", "wpb": "128.9", "bsz": "8", "num_updates": "24400", "lr": "5.784e-05", "gnorm": "3.82", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "54453"}
-[2024-06-08 07:58:54,765][train_inner][INFO] - {"epoch": 2, "update": 1.945, "loss": "3.143", "ntokens": "128.005", "acc_total": "128.005", "n_correct": "67.62", "wer_total": "128.005", "n_error": "60.34", "ppl": "8.83", "accuracy": "52.826", "wer": "47.139", "wps": "70.4", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "24600", "lr": "5.6133e-05", "gnorm": "3.809", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "54816"}
-[2024-06-08 08:04:42,077][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 08:05:00,273][train_inner][INFO] - {"epoch": 2, "update": 1.961, "loss": "3.121", "ntokens": "128.395", "acc_total": "128.395", "n_correct": "68.12", "wer_total": "128.395", "n_error": "60.22", "ppl": "8.7", "accuracy": "53.055", "wer": "46.902", "wps": "70.3", "ups": "0.55", "wpb": "128.4", "bsz": "8", "num_updates": "24800", "lr": "5.44763e-05", "gnorm": "3.753", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "55182"}
-[2024-06-08 08:07:03,876][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
-[2024-06-08 08:11:05,696][train_inner][INFO] - {"epoch": 2, "update": 1.977, "loss": "3.143", "ntokens": "127.795", "acc_total": "127.795", "n_correct": "67.8", "wer_total": "127.795", "n_error": "59.9", "ppl": "8.83", "accuracy": "53.054", "wer": "46.872", "wps": "69.9", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "25000", "lr": "5.28686e-05", "gnorm": "3.809", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "55547"}
-[2024-06-08 08:11:05,696][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 08:33:09,597][valid][INFO] - {"epoch": 2, "valid_loss": "3.277", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "7.98026", "valid_wer_total": "15.5723", "valid_n_error": "7.58075", "valid_ppl": "9.69", "valid_accuracy": "51.247", "valid_wer": "48.681", "valid_wps": "149", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "25000", "valid_best_accuracy": "51.247"}
-[2024-06-08 08:33:09,598][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25000 updates
-[2024-06-08 08:33:09,598][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_25000.pt
-[2024-06-08 08:33:12,765][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_25000.pt
-[2024-06-08 08:33:18,037][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_25000.pt (epoch 2 @ 25000 updates, score 51.247) (writing took 8.43882818496786 seconds)
-[2024-06-08 08:39:21,769][train_inner][INFO] - {"epoch": 2, "update": 1.993, "loss": "3.093", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "68.37", "wer_total": "127.235", "n_error": "58.83", "ppl": "8.53", "accuracy": "53.735", "wer": "46.237", "wps": "15", "ups": "0.12", "wpb": "127.2", "bsz": "8", "num_updates": "25200", "lr": "5.13083e-05", "gnorm": "3.834", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "57243"}
-[2024-06-08 08:42:01,108][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 09:04:06,614][valid][INFO] - {"epoch": 2, "valid_loss": "3.249", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "8.01879", "valid_wer_total": "15.5723", "valid_n_error": "7.543", "valid_ppl": "9.51", "valid_accuracy": "51.494", "valid_wer": "48.439", "valid_wps": "148.8", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "25288", "valid_best_accuracy": "51.494"}
-[2024-06-08 09:04:06,615][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25288 updates
-[2024-06-08 09:04:06,615][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_best.pt
-[2024-06-08 09:04:10,430][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_best.pt
-[2024-06-08 09:04:12,564][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 2 @ 25288 updates, score 51.494) (writing took 5.948897480033338 seconds)
-[2024-06-08 09:04:12,564][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below)
-[2024-06-08 09:04:12,599][train][INFO] - {"epoch": 2, "train_loss": "3.604", "train_ntokens": "128.045", "train_acc_total": "128.045", "train_n_correct": "60.7988", "train_wer_total": "128.045", "train_n_error": "67.145", "train_ppl": "12.16", "train_accuracy": "47.483", "train_wer": "52.439", "train_wps": "53.9", "train_ups": "0.42", "train_wpb": "128", "train_bsz": "8", "train_num_updates": "25288", "train_lr": "5.06364e-05", "train_gnorm": "3.525", "train_loss_scale": "1024", "train_train_wall": "22002", "train_gb_free": "6.5", "train_wall": "58734"}
-[2024-06-08 09:04:12,644][fairseq.trainer][INFO] - begin training epoch 3
-[2024-06-08 09:04:12,645][fairseq_cli.train][INFO] - Start iterating over samples
-[2024-06-08 09:07:36,335][train_inner][INFO] - {"epoch": 3, "update": 2.009, "loss": "2.945", "ntokens": "128.785", "acc_total": "128.785", "n_correct": "70.53", "wer_total": "128.785", "n_error": "58.2", "ppl": "7.7", "accuracy": "54.766", "wer": "45.192", "wps": "15.2", "ups": "0.12", "wpb": "128.8", "bsz": "8", "num_updates": "25400", "lr": "4.9794e-05", "gnorm": "3.721", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "58938"}
-[2024-06-08 09:13:40,631][train_inner][INFO] - {"epoch": 3, "update": 2.025, "loss": "2.833", "ntokens": "127.715", "acc_total": "127.715", "n_correct": "71.94", "wer_total": "127.715", "n_error": "55.735", "ppl": "7.13", "accuracy": "56.329", "wer": "43.64", "wps": "70.1", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "25600", "lr": "4.83244e-05", "gnorm": "3.686", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "59302"}
-[2024-06-08 09:19:44,463][train_inner][INFO] - {"epoch": 3, "update": 2.04, "loss": "2.815", "ntokens": "127.975", "acc_total": "127.975", "n_correct": "72.4", "wer_total": "127.975", "n_error": "55.52", "ppl": "7.04", "accuracy": "56.574", "wer": "43.383", "wps": "70.3", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "25800", "lr": "4.68982e-05", "gnorm": "3.788", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "59666"}
-[2024-06-08 09:25:48,573][train_inner][INFO] - {"epoch": 3, "update": 2.056, "loss": "2.828", "ntokens": "128.025", "acc_total": "128.025", "n_correct": "71.94", "wer_total": "128.025", "n_error": "56.04", "ppl": "7.1", "accuracy": "56.192", "wer": "43.773", "wps": "70.3", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "26000", "lr": "4.55141e-05", "gnorm": "3.725", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "60030"}
-[2024-06-08 09:31:52,864][train_inner][INFO] - {"epoch": 3, "update": 2.072, "loss": "2.871", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "71.51", "wer_total": "126.97", "n_error": "55.415", "ppl": "7.32", "accuracy": "56.32", "wer": "43.644", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "26200", "lr": "4.41708e-05", "gnorm": "3.741", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "60394"}
-[2024-06-08 09:37:57,088][train_inner][INFO] - {"epoch": 3, "update": 2.088, "loss": "2.733", "ntokens": "127.47", "acc_total": "127.47", "n_correct": "73.125", "wer_total": "127.47", "n_error": "54.27", "ppl": "6.65", "accuracy": "57.366", "wer": "42.575", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "26400", "lr": "4.28672e-05", "gnorm": "3.658", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "60759"}
-[2024-06-08 09:44:01,176][train_inner][INFO] - {"epoch": 3, "update": 2.104, "loss": "2.743", "ntokens": "128.05", "acc_total": "128.05", "n_correct": "73.17", "wer_total": "128.05", "n_error": "54.855", "ppl": "6.7", "accuracy": "57.142", "wer": "42.839", "wps": "70.3", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "26600", "lr": "4.16021e-05", "gnorm": "3.743", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "61123"}
-[2024-06-08 09:50:05,331][train_inner][INFO] - {"epoch": 3, "update": 2.12, "loss": "2.809", "ntokens": "127.38", "acc_total": "127.38", "n_correct": "72.125", "wer_total": "127.38", "n_error": "55.205", "ppl": "7.01", "accuracy": "56.622", "wer": "43.339", "wps": "70", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "26800", "lr": "4.03743e-05", "gnorm": "3.767", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "61487"}
-[2024-06-08 09:56:09,541][train_inner][INFO] - {"epoch": 3, "update": 2.135, "loss": "2.801", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "71.855", "wer_total": "126.82", "n_error": "54.92", "ppl": "6.97", "accuracy": "56.659", "wer": "43.305", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "27000", "lr": "3.91827e-05", "gnorm": "3.772", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "61851"}
-[2024-06-08 10:02:13,612][train_inner][INFO] - {"epoch": 3, "update": 2.151, "loss": "2.729", "ntokens": "128.005", "acc_total": "128.005", "n_correct": "73.46", "wer_total": "128.005", "n_error": "54.475", "ppl": "6.63", "accuracy": "57.388", "wer": "42.557", "wps": "70.3", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "27200", "lr": "3.80263e-05", "gnorm": "3.816", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "62215"}
-[2024-06-08 10:08:17,870][train_inner][INFO] - {"epoch": 3, "update": 2.167, "loss": "2.799", "ntokens": "127.945", "acc_total": "127.945", "n_correct": "72.465", "wer_total": "127.945", "n_error": "55.43", "ppl": "6.96", "accuracy": "56.638", "wer": "43.323", "wps": "70.2", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "27400", "lr": "3.6904e-05", "gnorm": "3.831", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "62579"}
-[2024-06-08 10:11:20,060][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 10:33:25,918][valid][INFO] - {"epoch": 3, "valid_loss": "3.232", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "8.07155", "valid_wer_total": "15.5723", "valid_n_error": "7.49112", "valid_ppl": "9.4", "valid_accuracy": "51.833", "valid_wer": "48.105", "valid_wps": "148.7", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "27500", "valid_best_accuracy": "51.833"}
-[2024-06-08 10:33:25,919][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 27500 updates
-[2024-06-08 10:33:25,919][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_3_27500.pt
-[2024-06-08 10:33:29,095][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_3_27500.pt
-[2024-06-08 10:33:33,294][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_3_27500.pt (epoch 3 @ 27500 updates, score 51.833) (writing took 7.375236897962168 seconds)
-[2024-06-08 10:36:35,307][train_inner][INFO] - {"epoch": 3, "update": 2.183, "loss": "2.773", "ntokens": "127.265", "acc_total": "127.265", "n_correct": "72.675", "wer_total": "127.265", "n_error": "54.49", "ppl": "6.84", "accuracy": "57.105", "wer": "42.816", "wps": "15", "ups": "0.12", "wpb": "127.3", "bsz": "8", "num_updates": "27600", "lr": "3.58149e-05", "gnorm": "3.742", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "64277"}
-[2024-06-08 10:42:39,723][train_inner][INFO] - {"epoch": 3, "update": 2.199, "loss": "2.785", "ntokens": "127.93", "acc_total": "127.93", "n_correct": "72.64", "wer_total": "127.93", "n_error": "55.225", "ppl": "6.89", "accuracy": "56.781", "wer": "43.168", "wps": "70.2", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "27800", "lr": "3.47579e-05", "gnorm": "3.784", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "64641"}
-[2024-06-08 10:48:44,065][train_inner][INFO] - {"epoch": 3, "update": 2.214, "loss": "2.787", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "72.82", "wer_total": "127.42", "n_error": "54.55", "ppl": "6.9", "accuracy": "57.15", "wer": "42.811", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "28000", "lr": "3.37321e-05", "gnorm": "3.773", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "65006"}
-[2024-06-08 10:54:48,438][train_inner][INFO] - {"epoch": 3, "update": 2.23, "loss": "2.74", "ntokens": "127.62", "acc_total": "127.62", "n_correct": "73.09", "wer_total": "127.62", "n_error": "54.465", "ppl": "6.68", "accuracy": "57.272", "wer": "42.677", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "28200", "lr": "3.27365e-05", "gnorm": "3.769", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "65370"}
-[2024-06-08 11:00:52,958][train_inner][INFO] - {"epoch": 3, "update": 2.246, "loss": "2.785", "ntokens": "127.63", "acc_total": "127.63", "n_correct": "72.42", "wer_total": "127.63", "n_error": "55.155", "ppl": "6.89", "accuracy": "56.742", "wer": "43.215", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "28400", "lr": "3.17704e-05", "gnorm": "3.792", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "65735"}
-[2024-06-08 11:06:57,639][train_inner][INFO] - {"epoch": 3, "update": 2.262, "loss": "2.815", "ntokens": "127.61", "acc_total": "127.61", "n_correct": "72.07", "wer_total": "127.61", "n_error": "55.49", "ppl": "7.03", "accuracy": "56.477", "wer": "43.484", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "28600", "lr": "3.08327e-05", "gnorm": "3.821", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "66099"}
-[2024-06-08 11:13:02,259][train_inner][INFO] - {"epoch": 3, "update": 2.278, "loss": "2.743", "ntokens": "128.135", "acc_total": "128.135", "n_correct": "73.085", "wer_total": "128.135", "n_error": "54.97", "ppl": "6.7", "accuracy": "57.037", "wer": "42.9", "wps": "70.3", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "28800", "lr": "2.99228e-05", "gnorm": "3.737", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "66464"}
-[2024-06-08 11:19:06,933][train_inner][INFO] - {"epoch": 3, "update": 2.293, "loss": "2.797", "ntokens": "128.44", "acc_total": "128.44", "n_correct": "73.06", "wer_total": "128.44", "n_error": "55.32", "ppl": "6.95", "accuracy": "56.883", "wer": "43.071", "wps": "70.4", "ups": "0.55", "wpb": "128.4", "bsz": "8", "num_updates": "29000", "lr": "2.90397e-05", "gnorm": "3.797", "loss_scale": "4096", "train_wall": "364", "gb_free": "6.5", "wall": "66829"}
-[2024-06-08 11:21:16,289][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
-[2024-06-08 11:25:13,266][train_inner][INFO] - {"epoch": 3, "update": 2.309, "loss": "2.76", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "72.56", "wer_total": "127.19", "n_error": "54.56", "ppl": "6.77", "accuracy": "57.049", "wer": "42.896", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "29200", "lr": "2.81826e-05", "gnorm": "3.814", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "67195"}
-[2024-06-08 11:31:17,903][train_inner][INFO] - {"epoch": 3, "update": 2.325, "loss": "2.779", "ntokens": "128.01", "acc_total": "128.01", "n_correct": "72.96", "wer_total": "128.01", "n_error": "55", "ppl": "6.87", "accuracy": "56.996", "wer": "42.965", "wps": "70.2", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "29400", "lr": "2.73509e-05", "gnorm": "3.789", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "67560"}
-[2024-06-08 11:37:22,348][train_inner][INFO] - {"epoch": 3, "update": 2.341, "loss": "2.733", "ntokens": "127.46", "acc_total": "127.46", "n_correct": "73.06", "wer_total": "127.46", "n_error": "54.325", "ppl": "6.65", "accuracy": "57.32", "wer": "42.621", "wps": "69.9", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "29600", "lr": "2.65436e-05", "gnorm": "3.781", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "67924"}
-[2024-06-08 11:43:26,607][train_inner][INFO] - {"epoch": 3, "update": 2.357, "loss": "2.755", "ntokens": "128.71", "acc_total": "128.71", "n_correct": "73.845", "wer_total": "128.71", "n_error": "54.805", "ppl": "6.75", "accuracy": "57.373", "wer": "42.58", "wps": "70.7", "ups": "0.55", "wpb": "128.7", "bsz": "8", "num_updates": "29800", "lr": "2.57603e-05", "gnorm": "3.808", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "68288"}
-[2024-06-08 11:49:31,171][train_inner][INFO] - {"epoch": 3, "update": 2.373, "loss": "2.789", "ntokens": "128.475", "acc_total": "128.475", "n_correct": "73.59", "wer_total": "128.475", "n_error": "54.82", "ppl": "6.91", "accuracy": "57.28", "wer": "42.67", "wps": "70.5", "ups": "0.55", "wpb": "128.5", "bsz": "8", "num_updates": "30000", "lr": "2.5e-05", "gnorm": "3.79", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "68653"}
-[2024-06-08 11:49:31,171][fairseq_cli.train][INFO] - Stopping training due to num_updates: 30000 >= max_update: 30000
-[2024-06-08 11:49:31,171][fairseq_cli.train][INFO] - begin validation on "valid" subset
-[2024-06-08 12:11:38,309][valid][INFO] - {"epoch": 3, "valid_loss": "3.195", "valid_ntokens": "15.5723", "valid_acc_total": "15.5723", "valid_n_correct": "8.1303", "valid_wer_total": "15.5723", "valid_n_error": "7.43315", "valid_ppl": "9.16", "valid_accuracy": "52.21", "valid_wer": "47.733", "valid_wps": "148.6", "valid_wpb": "15.6", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "52.21"}
-[2024-06-08 12:11:38,310][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 30000 updates
-[2024-06-08 12:11:38,310][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_3_30000.pt
-[2024-06-08 12:11:41,489][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_3_30000.pt
-[2024-06-08 12:11:45,787][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_3_30000.pt (epoch 3 @ 30000 updates, score 52.21) (writing took 7.477471122983843 seconds)
-[2024-06-08 12:11:45,829][fairseq_cli.train][INFO] - end of epoch 3 (average epoch stats below)
-[2024-06-08 12:11:45,831][train][INFO] - {"epoch": 3, "train_loss": "2.783", "train_ntokens": "127.772", "train_acc_total": "127.772", "train_n_correct": "72.6942", "train_wer_total": "127.772", "train_n_error": "55.0206", "train_ppl": "6.88", "train_accuracy": "56.893", "train_wer": "43.061", "train_wps": "53.5", "train_ups": "0.42", "train_wpb": "127.8", "train_bsz": "8", "train_num_updates": "30000", "train_lr": "2.5e-05", "train_gnorm": "3.768", "train_loss_scale": "2048", "train_train_wall": "8566", "train_gb_free": "6.5", "train_wall": "69987"}
-[2024-06-08 12:11:45,831][fairseq_cli.train][INFO] - done training in 69986.8 seconds
+[2024-06-15 00:52:01,428][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
+[2024-06-15 00:52:01,428][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
+[2024-06-15 00:52:05,871][fairseq.trainer][INFO] - begin training epoch 1
+[2024-06-15 00:52:05,871][fairseq_cli.train][INFO] - Start iterating over samples
+[2024-06-15 00:57:31,626][train_inner][INFO] - {"epoch": 1, "update": 0.013, "loss": "7.653", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "17.625", "wer_total": "126.725", "n_error": "109.03", "ppl": "201.3", "accuracy": "13.908", "wer": "86.037", "wps": "77.9", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "200", "lr": "1.49e-05", "gnorm": "8.816", "loss_scale": "128", "train_wall": "325", "gb_free": "7.1", "wall": "331"}
+[2024-06-15 01:02:59,048][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "6.228", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "25.55", "wer_total": "126.93", "n_error": "101.22", "ppl": "74.97", "accuracy": "20.129", "wer": "79.745", "wps": "77.5", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "400", "lr": "2.48e-05", "gnorm": "3.659", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "658"}
+[2024-06-15 01:08:26,785][train_inner][INFO] - {"epoch": 1, "update": 0.04, "loss": "6.1", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "28.08", "wer_total": "127.015", "n_error": "98.67", "ppl": "68.61", "accuracy": "22.108", "wer": "77.684", "wps": "77.5", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "600", "lr": "3.47e-05", "gnorm": "3.642", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "986"}
+[2024-06-15 01:13:54,381][train_inner][INFO] - {"epoch": 1, "update": 0.053, "loss": "5.911", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "29.77", "wer_total": "126.865", "n_error": "96.86", "ppl": "60.18", "accuracy": "23.466", "wer": "76.349", "wps": "77.5", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "800", "lr": "4.46e-05", "gnorm": "3.675", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "1313"}
+[2024-06-15 01:19:21,223][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "5.986", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "29.68", "wer_total": "127.025", "n_error": "97.1", "ppl": "63.4", "accuracy": "23.365", "wer": "76.442", "wps": "77.7", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "1000", "lr": "5.45e-05", "gnorm": "3.383", "loss_scale": "128", "train_wall": "326", "gb_free": "7.1", "wall": "1640"}
+[2024-06-15 01:24:48,144][train_inner][INFO] - {"epoch": 1, "update": 0.08, "loss": "5.935", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "30", "wer_total": "127.095", "n_error": "96.86", "ppl": "61.2", "accuracy": "23.604", "wer": "76.211", "wps": "77.8", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "1200", "lr": "6.44e-05", "gnorm": "3.179", "loss_scale": "128", "train_wall": "326", "gb_free": "7.1", "wall": "1967"}
+[2024-06-15 01:30:15,106][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "5.786", "ntokens": "127.62", "acc_total": "127.62", "n_correct": "31.32", "wer_total": "127.62", "n_error": "96.11", "ppl": "55.18", "accuracy": "24.542", "wer": "75.31", "wps": "78.1", "ups": "0.61", "wpb": "127.6", "bsz": "8", "num_updates": "1400", "lr": "7.43e-05", "gnorm": "2.971", "loss_scale": "128", "train_wall": "326", "gb_free": "7.1", "wall": "2294"}
+[2024-06-15 01:35:42,869][train_inner][INFO] - {"epoch": 1, "update": 0.106, "loss": "5.816", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "31.21", "wer_total": "127.41", "n_error": "95.89", "ppl": "56.32", "accuracy": "24.496", "wer": "75.261", "wps": "77.7", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "1600", "lr": "8.42e-05", "gnorm": "2.741", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "2622"}
+[2024-06-15 01:41:10,609][train_inner][INFO] - {"epoch": 1, "update": 0.119, "loss": "5.874", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "30.68", "wer_total": "126.56", "n_error": "95.71", "ppl": "58.64", "accuracy": "24.241", "wer": "75.624", "wps": "77.2", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "1800", "lr": "9.41e-05", "gnorm": "2.591", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "2950"}
+[2024-06-15 01:46:38,036][train_inner][INFO] - {"epoch": 1, "update": 0.133, "loss": "5.789", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "31.65", "wer_total": "126.875", "n_error": "95.06", "ppl": "55.3", "accuracy": "24.946", "wer": "74.924", "wps": "77.5", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "2000", "lr": "0.000104", "gnorm": "2.416", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "3277"}
+[2024-06-15 01:52:05,895][train_inner][INFO] - {"epoch": 1, "update": 0.146, "loss": "5.755", "ntokens": "128.2", "acc_total": "128.2", "n_correct": "31.92", "wer_total": "128.2", "n_error": "96", "ppl": "54", "accuracy": "24.899", "wer": "74.883", "wps": "78.2", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "2200", "lr": "0.0001139", "gnorm": "2.249", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "3605"}
+[2024-06-15 01:57:33,897][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "5.749", "ntokens": "127.775", "acc_total": "127.775", "n_correct": "32.21", "wer_total": "127.775", "n_error": "95.35", "ppl": "53.76", "accuracy": "25.208", "wer": "74.623", "wps": "77.9", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "2400", "lr": "0.0001238", "gnorm": "2.097", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "3933"}
+[2024-06-15 02:00:17,920][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 02:42:01,030][valid][INFO] - {"epoch": 1, "valid_loss": "5.673", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "4.66215", "valid_wer_total": "18.1585", "valid_n_error": "13.4569", "valid_ppl": "51.02", "valid_accuracy": "25.675", "valid_wer": "74.108", "valid_wps": "174", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "2500"}
+[2024-06-15 02:42:01,031][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 2500 updates
+[2024-06-15 02:42:01,031][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_2500.pt
+[2024-06-15 02:42:04,118][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_2500.pt
+[2024-06-15 02:42:06,850][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_2500.pt (epoch 1 @ 2500 updates, score 25.675) (writing took 5.819389069001772 seconds)
+[2024-06-15 02:44:50,079][train_inner][INFO] - {"epoch": 1, "update": 0.172, "loss": "5.737", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "31.765", "wer_total": "126.92", "n_error": "95.005", "ppl": "53.34", "accuracy": "25.028", "wer": "74.854", "wps": "9", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "2600", "lr": "0.0001337", "gnorm": "1.986", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "6769"}
+[2024-06-15 02:50:17,543][train_inner][INFO] - {"epoch": 1, "update": 0.186, "loss": "5.77", "ntokens": "125.685", "acc_total": "125.685", "n_correct": "31.445", "wer_total": "125.685", "n_error": "94.045", "ppl": "54.55", "accuracy": "25.019", "wer": "74.826", "wps": "76.8", "ups": "0.61", "wpb": "125.7", "bsz": "8", "num_updates": "2800", "lr": "0.0001436", "gnorm": "1.939", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "7097"}
+[2024-06-15 02:55:44,223][train_inner][INFO] - {"epoch": 1, "update": 0.199, "loss": "5.752", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "32.58", "wer_total": "127.19", "n_error": "94.365", "ppl": "53.89", "accuracy": "25.615", "wer": "74.192", "wps": "77.9", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "3000", "lr": "0.0001535", "gnorm": "1.887", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "7423"}
+[2024-06-15 03:01:11,678][train_inner][INFO] - {"epoch": 1, "update": 0.212, "loss": "5.68", "ntokens": "126.535", "acc_total": "126.535", "n_correct": "32.935", "wer_total": "126.535", "n_error": "93.395", "ppl": "51.27", "accuracy": "26.028", "wer": "73.81", "wps": "77.3", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "3200", "lr": "0.0001634", "gnorm": "1.858", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "7751"}
+[2024-06-15 03:06:38,814][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "5.69", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "32.885", "wer_total": "126.53", "n_error": "93.425", "ppl": "51.62", "accuracy": "25.99", "wer": "73.836", "wps": "77.4", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "3400", "lr": "0.0001733", "gnorm": "1.846", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "8078"}
+[2024-06-15 03:12:06,165][train_inner][INFO] - {"epoch": 1, "update": 0.239, "loss": "5.621", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "33.72", "wer_total": "127.025", "n_error": "93.035", "ppl": "49.21", "accuracy": "26.546", "wer": "73.241", "wps": "77.6", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "3600", "lr": "0.0001832", "gnorm": "1.825", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "8405"}
+[2024-06-15 03:17:33,259][train_inner][INFO] - {"epoch": 1, "update": 0.252, "loss": "5.608", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "34.315", "wer_total": "127.35", "n_error": "92.77", "ppl": "48.78", "accuracy": "26.945", "wer": "72.846", "wps": "77.9", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "3800", "lr": "0.0001931", "gnorm": "1.789", "loss_scale": "256", "train_wall": "326", "gb_free": "7.1", "wall": "8732"}
+[2024-06-15 03:23:00,598][train_inner][INFO] - {"epoch": 1, "update": 0.265, "loss": "5.573", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "34.91", "wer_total": "127.785", "n_error": "92.7", "ppl": "47.62", "accuracy": "27.319", "wer": "72.544", "wps": "78.1", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "4000", "lr": "0.000203", "gnorm": "1.862", "loss_scale": "256", "train_wall": "327", "gb_free": "7.1", "wall": "9060"}
+[2024-06-15 03:28:27,968][train_inner][INFO] - {"epoch": 1, "update": 0.278, "loss": "5.554", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "35.245", "wer_total": "126.255", "n_error": "90.78", "ppl": "46.98", "accuracy": "27.916", "wer": "71.902", "wps": "77.1", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "4200", "lr": "0.0002129", "gnorm": "1.88", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "9387"}
+[2024-06-15 03:33:55,493][train_inner][INFO] - {"epoch": 1, "update": 0.292, "loss": "5.508", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "35.565", "wer_total": "125.9", "n_error": "90.105", "ppl": "45.51", "accuracy": "28.249", "wer": "71.569", "wps": "76.9", "ups": "0.61", "wpb": "125.9", "bsz": "8", "num_updates": "4400", "lr": "0.0002228", "gnorm": "1.905", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "9715"}
+[2024-06-15 03:39:23,422][train_inner][INFO] - {"epoch": 1, "update": 0.305, "loss": "5.413", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "37.035", "wer_total": "127.885", "n_error": "90.61", "ppl": "42.59", "accuracy": "28.96", "wer": "70.853", "wps": "78", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "4600", "lr": "0.0002327", "gnorm": "1.974", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "10043"}
+[2024-06-15 03:44:51,684][train_inner][INFO] - {"epoch": 1, "update": 0.318, "loss": "5.426", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "36.655", "wer_total": "126.21", "n_error": "89.27", "ppl": "42.99", "accuracy": "29.043", "wer": "70.731", "wps": "76.9", "ups": "0.61", "wpb": "126.2", "bsz": "8", "num_updates": "4800", "lr": "0.0002426", "gnorm": "2.073", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "10371"}
+[2024-06-15 03:50:19,993][train_inner][INFO] - {"epoch": 1, "update": 0.331, "loss": "5.367", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "37.21", "wer_total": "126.87", "n_error": "89.44", "ppl": "41.27", "accuracy": "29.329", "wer": "70.497", "wps": "77.3", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "5000", "lr": "0.0002525", "gnorm": "2.106", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "10699"}
+[2024-06-15 03:50:19,993][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 04:32:03,802][valid][INFO] - {"epoch": 1, "valid_loss": "5.233", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "5.49416", "valid_wer_total": "18.1585", "valid_n_error": "12.6286", "valid_ppl": "37.62", "valid_accuracy": "30.257", "valid_wer": "69.547", "valid_wps": "174", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "5000", "valid_best_accuracy": "30.257"}
+[2024-06-15 04:32:03,803][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 5000 updates
+[2024-06-15 04:32:03,803][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_5000.pt
+[2024-06-15 04:32:06,855][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_5000.pt
+[2024-06-15 04:32:10,884][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_5000.pt (epoch 1 @ 5000 updates, score 30.257) (writing took 7.081204656002228 seconds)
+[2024-06-15 04:37:38,555][train_inner][INFO] - {"epoch": 1, "update": 0.345, "loss": "5.211", "ntokens": "126.51", "acc_total": "126.51", "n_correct": "38.9", "wer_total": "126.51", "n_error": "87.38", "ppl": "37.05", "accuracy": "30.749", "wer": "69.07", "wps": "8.9", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "5200", "lr": "0.0002624", "gnorm": "2.145", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "13538"}
+[2024-06-15 04:43:06,135][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "5.241", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "39.265", "wer_total": "127.425", "n_error": "87.925", "ppl": "37.82", "accuracy": "30.814", "wer": "69.001", "wps": "77.8", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "5400", "lr": "0.0002723", "gnorm": "2.165", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "13865"}
+[2024-06-15 04:48:34,257][train_inner][INFO] - {"epoch": 1, "update": 0.371, "loss": "5.172", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "40.005", "wer_total": "127.52", "n_error": "87.32", "ppl": "36.05", "accuracy": "31.372", "wer": "68.476", "wps": "77.7", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "5600", "lr": "0.0002822", "gnorm": "2.246", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "14193"}
+[2024-06-15 04:54:02,100][train_inner][INFO] - {"epoch": 1, "update": 0.384, "loss": "5.259", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "38.805", "wer_total": "126.59", "n_error": "87.57", "ppl": "38.29", "accuracy": "30.654", "wer": "69.176", "wps": "77.2", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "5800", "lr": "0.0002921", "gnorm": "2.254", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "14521"}
+[2024-06-15 04:59:29,936][train_inner][INFO] - {"epoch": 1, "update": 0.398, "loss": "5.138", "ntokens": "127.395", "acc_total": "127.395", "n_correct": "40.225", "wer_total": "127.395", "n_error": "86.99", "ppl": "35.2", "accuracy": "31.575", "wer": "68.284", "wps": "77.7", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "6000", "lr": "0.000302", "gnorm": "2.293", "loss_scale": "512", "train_wall": "327", "gb_free": "7.1", "wall": "14849"}
+[2024-06-15 05:04:57,817][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "5.077", "ntokens": "126.995", "acc_total": "126.995", "n_correct": "40.72", "wer_total": "126.995", "n_error": "86.06", "ppl": "33.76", "accuracy": "32.064", "wer": "67.766", "wps": "77.5", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "6200", "lr": "0.0003119", "gnorm": "2.334", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "15177"}
+[2024-06-15 05:10:25,543][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "5.079", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "40.595", "wer_total": "126.01", "n_error": "85.16", "ppl": "33.8", "accuracy": "32.216", "wer": "67.582", "wps": "76.9", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "6400", "lr": "0.0003218", "gnorm": "2.416", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "15505"}
+[2024-06-15 05:15:53,592][train_inner][INFO] - {"epoch": 1, "update": 0.437, "loss": "5.088", "ntokens": "126.33", "acc_total": "126.33", "n_correct": "40.64", "wer_total": "126.33", "n_error": "85.455", "ppl": "34.02", "accuracy": "32.17", "wer": "67.644", "wps": "77", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "6600", "lr": "0.0003317", "gnorm": "2.443", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "15833"}
+[2024-06-15 05:21:21,694][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "5.004", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "41.64", "wer_total": "126.735", "n_error": "84.865", "ppl": "32.09", "accuracy": "32.856", "wer": "66.963", "wps": "77.3", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "6800", "lr": "0.0003416", "gnorm": "2.415", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "16161"}
+[2024-06-15 05:26:50,062][train_inner][INFO] - {"epoch": 1, "update": 0.464, "loss": "4.967", "ntokens": "126.98", "acc_total": "126.98", "n_correct": "42.295", "wer_total": "126.98", "n_error": "84.44", "ppl": "31.27", "accuracy": "33.308", "wer": "66.499", "wps": "77.3", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "7000", "lr": "0.0003515", "gnorm": "2.439", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "16489"}
+[2024-06-15 05:32:17,794][train_inner][INFO] - {"epoch": 1, "update": 0.477, "loss": "5.046", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "41.965", "wer_total": "126.97", "n_error": "84.74", "ppl": "33.03", "accuracy": "33.051", "wer": "66.74", "wps": "77.5", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "7200", "lr": "0.0003614", "gnorm": "2.468", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "16817"}
+[2024-06-15 05:37:45,893][train_inner][INFO] - {"epoch": 1, "update": 0.491, "loss": "4.864", "ntokens": "127.45", "acc_total": "127.45", "n_correct": "43.48", "wer_total": "127.45", "n_error": "83.84", "ppl": "29.11", "accuracy": "34.115", "wer": "65.783", "wps": "77.7", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "7400", "lr": "0.0003713", "gnorm": "2.516", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "17145"}
+[2024-06-15 05:40:29,805][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 06:22:14,571][valid][INFO] - {"epoch": 1, "valid_loss": "4.745", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "6.38145", "valid_wer_total": "18.1585", "valid_n_error": "11.7444", "valid_ppl": "26.82", "valid_accuracy": "35.143", "valid_wer": "64.677", "valid_wps": "173.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "7500", "valid_best_accuracy": "35.143"}
+[2024-06-15 06:22:14,572][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 7500 updates
+[2024-06-15 06:22:14,572][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_7500.pt
+[2024-06-15 06:22:17,637][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_7500.pt
+[2024-06-15 06:22:22,127][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_7500.pt (epoch 1 @ 7500 updates, score 35.143) (writing took 7.555394832001184 seconds)
+[2024-06-15 06:25:05,820][train_inner][INFO] - {"epoch": 1, "update": 0.504, "loss": "4.928", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "42.855", "wer_total": "127.32", "n_error": "84.285", "ppl": "30.44", "accuracy": "33.659", "wer": "66.199", "wps": "9", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "7600", "lr": "0.0003812", "gnorm": "2.522", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "19985"}
+[2024-06-15 06:30:33,680][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "4.92", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "42.74", "wer_total": "127.11", "n_error": "84.13", "ppl": "30.28", "accuracy": "33.624", "wer": "66.187", "wps": "77.5", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "7800", "lr": "0.0003911", "gnorm": "2.624", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "20313"}
+[2024-06-15 06:36:00,672][train_inner][INFO] - {"epoch": 1, "update": 0.53, "loss": "4.876", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "43.295", "wer_total": "126.875", "n_error": "83.335", "ppl": "29.36", "accuracy": "34.124", "wer": "65.683", "wps": "77.6", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "8000", "lr": "0.000401", "gnorm": "2.689", "loss_scale": "1024", "train_wall": "326", "gb_free": "7.1", "wall": "20640"}
+[2024-06-15 06:41:28,018][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "4.824", "ntokens": "126.05", "acc_total": "126.05", "n_correct": "43.455", "wer_total": "126.05", "n_error": "82.415", "ppl": "28.33", "accuracy": "34.474", "wer": "65.383", "wps": "77", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "8200", "lr": "0.0004109", "gnorm": "2.749", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "20967"}
+[2024-06-15 06:46:55,297][train_inner][INFO] - {"epoch": 1, "update": 0.557, "loss": "4.811", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "44.405", "wer_total": "126.785", "n_error": "82.195", "ppl": "28.07", "accuracy": "35.024", "wer": "64.83", "wps": "77.5", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "8400", "lr": "0.0004208", "gnorm": "2.728", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "21294"}
+[2024-06-15 06:52:22,803][train_inner][INFO] - {"epoch": 1, "update": 0.57, "loss": "4.802", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "44.075", "wer_total": "126.675", "n_error": "82.47", "ppl": "27.89", "accuracy": "34.794", "wer": "65.104", "wps": "77.4", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "8600", "lr": "0.0004307", "gnorm": "2.78", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "21622"}
+[2024-06-15 06:57:50,350][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "4.753", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "45.335", "wer_total": "127.09", "n_error": "81.585", "ppl": "26.96", "accuracy": "35.672", "wer": "64.195", "wps": "77.6", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "8800", "lr": "0.0004406", "gnorm": "2.804", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "21949"}
+[2024-06-15 07:03:17,610][train_inner][INFO] - {"epoch": 1, "update": 0.597, "loss": "4.738", "ntokens": "127.43", "acc_total": "127.43", "n_correct": "45.315", "wer_total": "127.43", "n_error": "81.94", "ppl": "26.68", "accuracy": "35.561", "wer": "64.302", "wps": "77.9", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "9000", "lr": "0.0004505", "gnorm": "2.828", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "22277"}
+[2024-06-15 07:08:45,406][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "4.724", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "45.305", "wer_total": "127.145", "n_error": "81.675", "ppl": "26.43", "accuracy": "35.633", "wer": "64.238", "wps": "77.6", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "9200", "lr": "0.0004604", "gnorm": "2.847", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "22605"}
+[2024-06-15 07:14:13,843][train_inner][INFO] - {"epoch": 1, "update": 0.623, "loss": "4.756", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "44.545", "wer_total": "125.995", "n_error": "81.32", "ppl": "27.02", "accuracy": "35.355", "wer": "64.542", "wps": "76.7", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "9400", "lr": "0.0004703", "gnorm": "2.896", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "22933"}
+[2024-06-15 07:19:42,279][train_inner][INFO] - {"epoch": 1, "update": 0.636, "loss": "4.705", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "45.44", "wer_total": "126.87", "n_error": "81.195", "ppl": "26.08", "accuracy": "35.816", "wer": "63.999", "wps": "77.3", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "9600", "lr": "0.0004802", "gnorm": "2.908", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "23261"}
+[2024-06-15 07:25:10,873][train_inner][INFO] - {"epoch": 1, "update": 0.65, "loss": "4.664", "ntokens": "125.68", "acc_total": "125.68", "n_correct": "45.36", "wer_total": "125.68", "n_error": "80.145", "ppl": "25.36", "accuracy": "36.092", "wer": "63.769", "wps": "76.5", "ups": "0.61", "wpb": "125.7", "bsz": "8", "num_updates": "9800", "lr": "0.0004901", "gnorm": "2.981", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "23590"}
+[2024-06-15 07:30:39,588][train_inner][INFO] - {"epoch": 1, "update": 0.663, "loss": "4.657", "ntokens": "127.56", "acc_total": "127.56", "n_correct": "46.315", "wer_total": "127.56", "n_error": "81.04", "ppl": "25.23", "accuracy": "36.308", "wer": "63.531", "wps": "77.6", "ups": "0.61", "wpb": "127.6", "bsz": "8", "num_updates": "10000", "lr": "0.0005", "gnorm": "2.947", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "23919"}
+[2024-06-15 07:30:39,588][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 08:12:27,401][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "6.78612", "valid_wer_total": "18.1585", "valid_n_error": "11.3423", "valid_ppl": "nan", "valid_accuracy": "37.372", "valid_wer": "62.463", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "10000", "valid_best_accuracy": "37.372"}
+[2024-06-15 08:12:27,402][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 10000 updates
+[2024-06-15 08:12:27,402][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_10000.pt
+[2024-06-15 08:12:30,492][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_10000.pt
+[2024-06-15 08:12:35,047][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_10000.pt (epoch 1 @ 10000 updates, score 37.372) (writing took 7.64547687000595 seconds)
+[2024-06-15 08:18:02,795][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "4.645", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "45.85", "wer_total": "126.605", "n_error": "80.58", "ppl": "25.02", "accuracy": "36.215", "wer": "63.647", "wps": "8.9", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "10200", "lr": "0.000485243", "gnorm": "3.039", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "26762"}
+[2024-06-15 08:23:31,088][train_inner][INFO] - {"epoch": 1, "update": 0.689, "loss": "4.622", "ntokens": "127.295", "acc_total": "127.295", "n_correct": "46.085", "wer_total": "127.295", "n_error": "81.06", "ppl": "24.63", "accuracy": "36.203", "wer": "63.679", "wps": "77.5", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "10400", "lr": "0.000470922", "gnorm": "2.937", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "27090"}
+[2024-06-15 08:28:59,271][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "4.643", "ntokens": "127.73", "acc_total": "127.73", "n_correct": "46.63", "wer_total": "127.73", "n_error": "80.945", "ppl": "24.99", "accuracy": "36.507", "wer": "63.372", "wps": "77.8", "ups": "0.61", "wpb": "127.7", "bsz": "8", "num_updates": "10600", "lr": "0.000457024", "gnorm": "3.038", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "27418"}
+[2024-06-15 08:33:43,250][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
+[2024-06-15 08:34:29,263][train_inner][INFO] - {"epoch": 1, "update": 0.716, "loss": "4.582", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "47.03", "wer_total": "127.44", "n_error": "80.25", "ppl": "23.95", "accuracy": "36.904", "wer": "62.971", "wps": "77.2", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "10800", "lr": "0.000443536", "gnorm": "3.134", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "27748"}
+[2024-06-15 08:39:57,909][train_inner][INFO] - {"epoch": 1, "update": 0.729, "loss": "4.566", "ntokens": "126.09", "acc_total": "126.09", "n_correct": "46.735", "wer_total": "126.09", "n_error": "79.165", "ppl": "23.68", "accuracy": "37.065", "wer": "62.785", "wps": "76.7", "ups": "0.61", "wpb": "126.1", "bsz": "8", "num_updates": "11000", "lr": "0.000430446", "gnorm": "3.148", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "28077"}
+[2024-06-15 08:45:26,093][train_inner][INFO] - {"epoch": 1, "update": 0.742, "loss": "4.472", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "47.57", "wer_total": "126.625", "n_error": "78.885", "ppl": "22.19", "accuracy": "37.568", "wer": "62.298", "wps": "77.2", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "11200", "lr": "0.000417742", "gnorm": "3.088", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "28405"}
+[2024-06-15 08:50:54,150][train_inner][INFO] - {"epoch": 1, "update": 0.756, "loss": "4.469", "ntokens": "125.82", "acc_total": "125.82", "n_correct": "48.31", "wer_total": "125.82", "n_error": "77.4", "ppl": "22.14", "accuracy": "38.396", "wer": "61.516", "wps": "76.7", "ups": "0.61", "wpb": "125.8", "bsz": "8", "num_updates": "11400", "lr": "0.000405413", "gnorm": "3.111", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "28733"}
+[2024-06-15 08:56:22,194][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "4.457", "ntokens": "126.955", "acc_total": "126.955", "n_correct": "49.28", "wer_total": "126.955", "n_error": "77.545", "ppl": "21.96", "accuracy": "38.817", "wer": "61.081", "wps": "77.4", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "11600", "lr": "0.000393448", "gnorm": "3.091", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "29061"}
+[2024-06-15 09:01:50,299][train_inner][INFO] - {"epoch": 1, "update": 0.782, "loss": "4.438", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "49.115", "wer_total": "126.705", "n_error": "77.435", "ppl": "21.68", "accuracy": "38.763", "wer": "61.114", "wps": "77.2", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "11800", "lr": "0.000381836", "gnorm": "3.193", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "29389"}
+[2024-06-15 09:07:18,644][train_inner][INFO] - {"epoch": 1, "update": 0.796, "loss": "4.405", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "49.91", "wer_total": "126.675", "n_error": "76.635", "ppl": "21.18", "accuracy": "39.4", "wer": "60.497", "wps": "77.2", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "12000", "lr": "0.000370567", "gnorm": "3.159", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "29718"}
+[2024-06-15 09:12:46,852][train_inner][INFO] - {"epoch": 1, "update": 0.809, "loss": "4.445", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "48.58", "wer_total": "126.855", "n_error": "78.155", "ppl": "21.79", "accuracy": "38.296", "wer": "61.61", "wps": "77.3", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "12200", "lr": "0.000359631", "gnorm": "3.114", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "30046"}
+[2024-06-15 09:18:15,100][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "4.335", "ntokens": "126.96", "acc_total": "126.96", "n_correct": "50.295", "wer_total": "126.96", "n_error": "76.545", "ppl": "20.18", "accuracy": "39.615", "wer": "60.291", "wps": "77.4", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "12400", "lr": "0.000349017", "gnorm": "3.303", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "30374"}
+[2024-06-15 09:20:59,151][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 10:02:49,198][valid][INFO] - {"epoch": 1, "valid_loss": "4.157", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "7.54602", "valid_wer_total": "18.1585", "valid_n_error": "10.5966", "valid_ppl": "17.84", "valid_accuracy": "41.556", "valid_wer": "58.356", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "12500", "valid_best_accuracy": "41.556"}
+[2024-06-15 10:02:49,198][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12500 updates
+[2024-06-15 10:02:49,199][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12500.pt
+[2024-06-15 10:02:52,296][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12500.pt
+[2024-06-15 10:02:56,900][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12500.pt (epoch 1 @ 12500 updates, score 41.556) (writing took 7.702132188001997 seconds)
+[2024-06-15 10:05:41,050][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "4.299", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "50.21", "wer_total": "126.58", "n_error": "76.16", "ppl": "19.68", "accuracy": "39.667", "wer": "60.167", "wps": "8.9", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "12600", "lr": "0.000338716", "gnorm": "3.18", "loss_scale": "2048", "train_wall": "327", "gb_free": "7.1", "wall": "33220"}
+[2024-06-15 10:11:09,864][train_inner][INFO] - {"epoch": 1, "update": 0.849, "loss": "4.263", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "51.71", "wer_total": "127.335", "n_error": "75.485", "ppl": "19.19", "accuracy": "40.609", "wer": "59.281", "wps": "77.5", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "12800", "lr": "0.00032872", "gnorm": "3.097", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "33549"}
+[2024-06-15 10:16:38,835][train_inner][INFO] - {"epoch": 1, "update": 0.862, "loss": "4.286", "ntokens": "125.99", "acc_total": "125.99", "n_correct": "50.86", "wer_total": "125.99", "n_error": "74.965", "ppl": "19.51", "accuracy": "40.368", "wer": "59.501", "wps": "76.6", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "13000", "lr": "0.000319018", "gnorm": "3.164", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "33878"}
+[2024-06-15 10:22:07,667][train_inner][INFO] - {"epoch": 1, "update": 0.875, "loss": "4.294", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "51.445", "wer_total": "126.805", "n_error": "75.255", "ppl": "19.61", "accuracy": "40.57", "wer": "59.347", "wps": "77.1", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "13200", "lr": "0.000309603", "gnorm": "3.153", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "34207"}
+[2024-06-15 10:27:36,369][train_inner][INFO] - {"epoch": 1, "update": 0.888, "loss": "4.285", "ntokens": "127.435", "acc_total": "127.435", "n_correct": "51.365", "wer_total": "127.435", "n_error": "75.88", "ppl": "19.5", "accuracy": "40.307", "wer": "59.544", "wps": "77.5", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "13400", "lr": "0.000300466", "gnorm": "3.203", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "34535"}
+[2024-06-15 10:33:05,101][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "4.187", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "51.92", "wer_total": "127.015", "n_error": "74.94", "ppl": "18.22", "accuracy": "40.877", "wer": "59.001", "wps": "77.3", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "13600", "lr": "0.000291598", "gnorm": "3.184", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "34864"}
+[2024-06-15 10:38:33,858][train_inner][INFO] - {"epoch": 1, "update": 0.915, "loss": "4.136", "ntokens": "126.965", "acc_total": "126.965", "n_correct": "52.4", "wer_total": "126.965", "n_error": "74.45", "ppl": "17.59", "accuracy": "41.271", "wer": "58.638", "wps": "77.2", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "13800", "lr": "0.000282992", "gnorm": "3.112", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "35193"}
+[2024-06-15 10:44:02,817][train_inner][INFO] - {"epoch": 1, "update": 0.928, "loss": "4.194", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "52.18", "wer_total": "127.145", "n_error": "74.86", "ppl": "18.31", "accuracy": "41.04", "wer": "58.878", "wps": "77.3", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "14000", "lr": "0.00027464", "gnorm": "3.171", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "35522"}
+[2024-06-15 10:49:31,903][train_inner][INFO] - {"epoch": 1, "update": 0.941, "loss": "4.123", "ntokens": "125.985", "acc_total": "125.985", "n_correct": "52.37", "wer_total": "125.985", "n_error": "73.455", "ppl": "17.43", "accuracy": "41.568", "wer": "58.305", "wps": "76.6", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "14200", "lr": "0.000266535", "gnorm": "3.163", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "35851"}
+[2024-06-15 10:55:01,055][train_inner][INFO] - {"epoch": 1, "update": 0.955, "loss": "4.159", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "52.635", "wer_total": "126.93", "n_error": "74.17", "ppl": "17.87", "accuracy": "41.468", "wer": "58.434", "wps": "77.1", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "14400", "lr": "0.000258668", "gnorm": "3.167", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "36180"}
+[2024-06-15 11:00:30,248][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "4.072", "ntokens": "127.5", "acc_total": "127.5", "n_correct": "53.815", "wer_total": "127.5", "n_error": "73.54", "ppl": "16.82", "accuracy": "42.208", "wer": "57.678", "wps": "77.5", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "14600", "lr": "0.000251034", "gnorm": "3.16", "loss_scale": "4096", "train_wall": "329", "gb_free": "7.1", "wall": "36509"}
+[2024-06-15 11:05:59,133][train_inner][INFO] - {"epoch": 1, "update": 0.981, "loss": "4.135", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "53.82", "wer_total": "127.32", "n_error": "73.335", "ppl": "17.57", "accuracy": "42.271", "wer": "57.599", "wps": "77.4", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "14800", "lr": "0.000243626", "gnorm": "3.076", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "36838"}
+[2024-06-15 11:07:54,203][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4096.0
+[2024-06-15 11:11:29,547][train_inner][INFO] - {"epoch": 1, "update": 0.994, "loss": "4.106", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "53.96", "wer_total": "127.335", "n_error": "73.26", "ppl": "17.21", "accuracy": "42.376", "wer": "57.533", "wps": "77.1", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "15000", "lr": "0.000236435", "gnorm": "3.198", "loss_scale": "4096", "train_wall": "330", "gb_free": "7.1", "wall": "37169"}
+[2024-06-15 11:11:29,548][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 11:53:19,331][valid][INFO] - {"epoch": 1, "valid_loss": "3.869", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "8.30025", "valid_wer_total": "18.1585", "valid_n_error": "9.84531", "valid_ppl": "14.61", "valid_accuracy": "45.71", "valid_wer": "54.219", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "45.71"}
+[2024-06-15 11:53:19,332][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15000 updates
+[2024-06-15 11:53:19,332][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_15000.pt
+[2024-06-15 11:53:22,475][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_15000.pt
+[2024-06-15 11:53:27,056][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_15000.pt (epoch 1 @ 15000 updates, score 45.71) (writing took 7.724081863998435 seconds)
+[2024-06-15 11:55:44,650][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 12:37:40,852][valid][INFO] - {"epoch": 1, "valid_loss": "3.934", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "8.07703", "valid_wer_total": "18.1585", "valid_n_error": "10.0708", "valid_ppl": "15.29", "valid_accuracy": "44.481", "valid_wer": "55.461", "valid_wps": "173.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15084", "valid_best_accuracy": "45.71"}
+[2024-06-15 12:37:40,853][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15084 updates
+[2024-06-15 12:37:40,854][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_last.pt
+[2024-06-15 12:37:44,845][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_last.pt
+[2024-06-15 12:37:44,927][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 1 @ 15084 updates, score 44.481) (writing took 4.0730797330033965 seconds)
+[2024-06-15 12:37:44,927][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below)
+[2024-06-15 12:37:44,974][train][INFO] - {"epoch": 1, "train_loss": "5.024", "train_ntokens": "126.898", "train_acc_total": "126.898", "train_n_correct": "41.6332", "train_wer_total": "126.898", "train_n_error": "85.0752", "train_ppl": "32.53", "train_accuracy": "32.808", "train_wer": "67.042", "train_wps": "45.2", "train_ups": "0.36", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "15084", "train_lr": "0.000233479", "train_gnorm": "2.791", "train_loss_scale": "4096", "train_train_wall": "24685", "train_gb_free": "7.1", "train_wall": "42344"}
+[2024-06-15 12:37:45,036][fairseq.trainer][INFO] - begin training epoch 2
+[2024-06-15 12:37:45,036][fairseq_cli.train][INFO] - Start iterating over samples
+[2024-06-15 12:40:56,327][train_inner][INFO] - {"epoch": 2, "update": 1.008, "loss": "3.989", "ntokens": "127.05", "acc_total": "127.05", "n_correct": "55.155", "wer_total": "127.05", "n_error": "71.745", "ppl": "15.88", "accuracy": "43.412", "wer": "56.47", "wps": "4.7", "ups": "0.04", "wpb": "127", "bsz": "8", "num_updates": "15200", "lr": "0.000229457", "gnorm": "3.173", "loss_scale": "4096", "train_wall": "328", "gb_free": "7.1", "wall": "42535"}
+[2024-06-15 12:46:26,336][train_inner][INFO] - {"epoch": 2, "update": 1.021, "loss": "3.916", "ntokens": "126.32", "acc_total": "126.32", "n_correct": "55.42", "wer_total": "126.32", "n_error": "70.775", "ppl": "15.09", "accuracy": "43.873", "wer": "56.028", "wps": "76.6", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "15400", "lr": "0.000222685", "gnorm": "3.188", "loss_scale": "4096", "train_wall": "329", "gb_free": "7.1", "wall": "42865"}
+[2024-06-15 12:49:36,050][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
+[2024-06-15 12:51:57,888][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "3.852", "ntokens": "126.99", "acc_total": "126.99", "n_correct": "56.285", "wer_total": "126.99", "n_error": "70.59", "ppl": "14.44", "accuracy": "44.322", "wer": "55.587", "wps": "76.6", "ups": "0.6", "wpb": "127", "bsz": "8", "num_updates": "15600", "lr": "0.000216113", "gnorm": "3.202", "loss_scale": "2048", "train_wall": "331", "gb_free": "7.1", "wall": "43197"}
+[2024-06-15 12:57:27,900][train_inner][INFO] - {"epoch": 2, "update": 1.048, "loss": "3.939", "ntokens": "125.545", "acc_total": "125.545", "n_correct": "55.04", "wer_total": "125.545", "n_error": "70.435", "ppl": "15.34", "accuracy": "43.841", "wer": "56.103", "wps": "76.1", "ups": "0.61", "wpb": "125.5", "bsz": "8", "num_updates": "15800", "lr": "0.000209735", "gnorm": "3.164", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "43527"}
+[2024-06-15 13:02:58,399][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "3.912", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "55.825", "wer_total": "127.405", "n_error": "71.48", "ppl": "15.05", "accuracy": "43.817", "wer": "56.105", "wps": "77.1", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "16000", "lr": "0.000203545", "gnorm": "3.117", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "43857"}
+[2024-06-15 13:08:28,729][train_inner][INFO] - {"epoch": 2, "update": 1.074, "loss": "3.792", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "57.17", "wer_total": "126.915", "n_error": "69.665", "ppl": "13.86", "accuracy": "45.046", "wer": "54.891", "wps": "76.8", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "16200", "lr": "0.000197538", "gnorm": "3.212", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "44188"}
+[2024-06-15 13:13:58,953][train_inner][INFO] - {"epoch": 2, "update": 1.087, "loss": "3.824", "ntokens": "127.14", "acc_total": "127.14", "n_correct": "57.16", "wer_total": "127.14", "n_error": "69.875", "ppl": "14.16", "accuracy": "44.958", "wer": "54.959", "wps": "77", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "16400", "lr": "0.000191708", "gnorm": "3.17", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "44518"}
+[2024-06-15 13:19:29,290][train_inner][INFO] - {"epoch": 2, "update": 1.101, "loss": "3.815", "ntokens": "127.51", "acc_total": "127.51", "n_correct": "56.95", "wer_total": "127.51", "n_error": "70.48", "ppl": "14.08", "accuracy": "44.663", "wer": "55.274", "wps": "77.2", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "16600", "lr": "0.00018605", "gnorm": "3.267", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "44848"}
+[2024-06-15 13:24:59,424][train_inner][INFO] - {"epoch": 2, "update": 1.114, "loss": "3.866", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "56.575", "wer_total": "126.405", "n_error": "69.705", "ppl": "14.58", "accuracy": "44.757", "wer": "55.144", "wps": "76.6", "ups": "0.61", "wpb": "126.4", "bsz": "8", "num_updates": "16800", "lr": "0.000180559", "gnorm": "3.169", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "45179"}
+[2024-06-15 13:30:30,077][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "3.777", "ntokens": "127.925", "acc_total": "127.925", "n_correct": "57.855", "wer_total": "127.925", "n_error": "69.93", "ppl": "13.71", "accuracy": "45.226", "wer": "54.665", "wps": "77.4", "ups": "0.6", "wpb": "127.9", "bsz": "8", "num_updates": "17000", "lr": "0.00017523", "gnorm": "3.094", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "45509"}
+[2024-06-15 13:36:00,474][train_inner][INFO] - {"epoch": 2, "update": 1.14, "loss": "3.799", "ntokens": "126.125", "acc_total": "126.125", "n_correct": "56.745", "wer_total": "126.125", "n_error": "69.26", "ppl": "13.92", "accuracy": "44.991", "wer": "54.914", "wps": "76.3", "ups": "0.61", "wpb": "126.1", "bsz": "8", "num_updates": "17200", "lr": "0.000170059", "gnorm": "3.071", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "45840"}
+[2024-06-15 13:41:30,986][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "3.836", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "56.795", "wer_total": "127.245", "n_error": "70.305", "ppl": "14.28", "accuracy": "44.634", "wer": "55.252", "wps": "77", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "17400", "lr": "0.00016504", "gnorm": "3.196", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "46170"}
+[2024-06-15 13:44:16,283][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 14:26:22,025][valid][INFO] - {"epoch": 2, "valid_loss": "3.71", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "8.39466", "valid_wer_total": "18.1585", "valid_n_error": "9.75169", "valid_ppl": "13.09", "valid_accuracy": "46.23", "valid_wer": "53.703", "valid_wps": "172.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "17500", "valid_best_accuracy": "46.23"}
+[2024-06-15 14:26:22,026][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 17500 updates
+[2024-06-15 14:26:22,026][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_17500.pt
+[2024-06-15 14:26:25,119][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_17500.pt
+[2024-06-15 14:26:29,631][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_17500.pt (epoch 2 @ 17500 updates, score 46.23) (writing took 7.605056370986858 seconds)
+[2024-06-15 14:29:14,647][train_inner][INFO] - {"epoch": 2, "update": 1.167, "loss": "3.825", "ntokens": "126.2", "acc_total": "126.2", "n_correct": "56.37", "wer_total": "126.2", "n_error": "69.715", "ppl": "14.17", "accuracy": "44.667", "wer": "55.242", "wps": "8.8", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "17600", "lr": "0.000160169", "gnorm": "3.168", "loss_scale": "4096", "train_wall": "330", "gb_free": "7.1", "wall": "49034"}
+[2024-06-15 14:34:45,124][train_inner][INFO] - {"epoch": 2, "update": 1.18, "loss": "3.708", "ntokens": "128.29", "acc_total": "128.29", "n_correct": "58.2", "wer_total": "128.29", "n_error": "69.975", "ppl": "13.07", "accuracy": "45.366", "wer": "54.544", "wps": "77.6", "ups": "0.61", "wpb": "128.3", "bsz": "8", "num_updates": "17800", "lr": "0.000155442", "gnorm": "3.116", "loss_scale": "4096", "train_wall": "330", "gb_free": "7.1", "wall": "49364"}
+[2024-06-15 14:40:15,790][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "3.778", "ntokens": "126.235", "acc_total": "126.235", "n_correct": "57.005", "wer_total": "126.235", "n_error": "69.12", "ppl": "13.71", "accuracy": "45.158", "wer": "54.755", "wps": "76.4", "ups": "0.6", "wpb": "126.2", "bsz": "8", "num_updates": "18000", "lr": "0.000150854", "gnorm": "3.134", "loss_scale": "4096", "train_wall": "330", "gb_free": "7.1", "wall": "49695"}
+[2024-06-15 14:40:43,494][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
+[2024-06-15 14:46:24,845][train_inner][INFO] - {"epoch": 2, "update": 1.207, "loss": "3.704", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "59.155", "wer_total": "127.64", "n_error": "68.36", "ppl": "13.03", "accuracy": "46.345", "wer": "53.557", "wps": "69.2", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "18200", "lr": "0.000146402", "gnorm": "4.037", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "50064"}
+[2024-06-15 14:52:31,650][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "3.596", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "60.195", "wer_total": "126.27", "n_error": "65.985", "ppl": "12.09", "accuracy": "47.672", "wer": "52.257", "wps": "68.8", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "18400", "lr": "0.000142081", "gnorm": "4", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "50431"}
+[2024-06-15 14:58:38,777][train_inner][INFO] - {"epoch": 2, "update": 1.233, "loss": "3.618", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "60.02", "wer_total": "126.87", "n_error": "66.725", "ppl": "12.28", "accuracy": "47.308", "wer": "52.593", "wps": "69.1", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "18600", "lr": "0.000137888", "gnorm": "3.911", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "50798"}
+[2024-06-15 15:04:45,951][train_inner][INFO] - {"epoch": 2, "update": 1.246, "loss": "3.585", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "60.365", "wer_total": "127.155", "n_error": "66.69", "ppl": "12", "accuracy": "47.474", "wer": "52.448", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "18800", "lr": "0.000133819", "gnorm": "3.882", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "51165"}
+[2024-06-15 15:05:51,881][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-15 15:10:55,054][train_inner][INFO] - {"epoch": 2, "update": 1.26, "loss": "3.486", "ntokens": "127.67", "acc_total": "127.67", "n_correct": "61.87", "wer_total": "127.67", "n_error": "65.705", "ppl": "11.2", "accuracy": "48.461", "wer": "51.465", "wps": "69.2", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "19000", "lr": "0.000129869", "gnorm": "3.796", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "51534"}
+[2024-06-15 15:17:02,719][train_inner][INFO] - {"epoch": 2, "update": 1.273, "loss": "3.551", "ntokens": "127.085", "acc_total": "127.085", "n_correct": "61.405", "wer_total": "127.085", "n_error": "65.595", "ppl": "11.72", "accuracy": "48.318", "wer": "51.615", "wps": "69.1", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "19200", "lr": "0.000126036", "gnorm": "3.852", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "51902"}
+[2024-06-15 15:23:10,172][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "3.416", "ntokens": "125.85", "acc_total": "125.85", "n_correct": "62.16", "wer_total": "125.85", "n_error": "63.565", "ppl": "10.67", "accuracy": "49.392", "wer": "50.509", "wps": "68.5", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "19400", "lr": "0.000122317", "gnorm": "3.839", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "52269"}
+[2024-06-15 15:29:17,336][train_inner][INFO] - {"epoch": 2, "update": 1.3, "loss": "3.403", "ntokens": "125.665", "acc_total": "125.665", "n_correct": "62.6", "wer_total": "125.665", "n_error": "62.995", "ppl": "10.58", "accuracy": "49.815", "wer": "50.129", "wps": "68.5", "ups": "0.54", "wpb": "125.7", "bsz": "8", "num_updates": "19600", "lr": "0.000118707", "gnorm": "3.905", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "52636"}
+[2024-06-15 15:35:24,299][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "3.369", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "63.715", "wer_total": "127.615", "n_error": "63.77", "ppl": "10.33", "accuracy": "49.928", "wer": "49.971", "wps": "69.6", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "19800", "lr": "0.000115203", "gnorm": "3.849", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "53003"}
+[2024-06-15 15:41:31,714][train_inner][INFO] - {"epoch": 2, "update": 1.326, "loss": "3.41", "ntokens": "126.165", "acc_total": "126.165", "n_correct": "63.24", "wer_total": "126.165", "n_error": "62.86", "ppl": "10.63", "accuracy": "50.125", "wer": "49.824", "wps": "68.7", "ups": "0.54", "wpb": "126.2", "bsz": "8", "num_updates": "20000", "lr": "0.000111803", "gnorm": "3.831", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "53371"}
+[2024-06-15 15:41:31,715][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 16:23:36,861][valid][INFO] - {"epoch": 2, "valid_loss": "3.303", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "9.36219", "valid_wer_total": "18.1585", "valid_n_error": "8.78666", "valid_ppl": "9.87", "valid_accuracy": "51.558", "valid_wer": "48.389", "valid_wps": "172.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "20000", "valid_best_accuracy": "51.558"}
+[2024-06-15 16:23:36,862][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 20000 updates
+[2024-06-15 16:23:36,862][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_20000.pt
+[2024-06-15 16:23:39,976][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_20000.pt
+[2024-06-15 16:23:44,513][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_20000.pt (epoch 2 @ 20000 updates, score 51.558) (writing took 7.6514022090123035 seconds)
+[2024-06-15 16:29:51,067][train_inner][INFO] - {"epoch": 2, "update": 1.339, "loss": "3.417", "ntokens": "126.37", "acc_total": "126.37", "n_correct": "63.25", "wer_total": "126.37", "n_error": "63.01", "ppl": "10.68", "accuracy": "50.051", "wer": "49.862", "wps": "8.7", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "20200", "lr": "0.000108504", "gnorm": "3.781", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "56270"}
+[2024-06-15 16:35:57,981][train_inner][INFO] - {"epoch": 2, "update": 1.353, "loss": "3.456", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "63.395", "wer_total": "127.095", "n_error": "63.635", "ppl": "10.98", "accuracy": "49.88", "wer": "50.069", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "20400", "lr": "0.000105301", "gnorm": "3.821", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "56637"}
+[2024-06-15 16:42:04,351][train_inner][INFO] - {"epoch": 2, "update": 1.366, "loss": "3.345", "ntokens": "126.5", "acc_total": "126.5", "n_correct": "64.715", "wer_total": "126.5", "n_error": "61.725", "ppl": "10.16", "accuracy": "51.158", "wer": "48.794", "wps": "69.1", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "20600", "lr": "0.000102194", "gnorm": "3.739", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "57003"}
+[2024-06-15 16:48:11,054][train_inner][INFO] - {"epoch": 2, "update": 1.379, "loss": "3.321", "ntokens": "127.265", "acc_total": "127.265", "n_correct": "65.52", "wer_total": "127.265", "n_error": "61.65", "ppl": "10", "accuracy": "51.483", "wer": "48.442", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "20800", "lr": "9.91776e-05", "gnorm": "3.775", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "57370"}
+[2024-06-15 16:54:17,492][train_inner][INFO] - {"epoch": 2, "update": 1.392, "loss": "3.347", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "65.225", "wer_total": "127.405", "n_error": "62.115", "ppl": "10.18", "accuracy": "51.195", "wer": "48.754", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "21000", "lr": "9.62506e-05", "gnorm": "3.741", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "57737"}
+[2024-06-15 17:00:24,227][train_inner][INFO] - {"epoch": 2, "update": 1.406, "loss": "3.296", "ntokens": "126.7", "acc_total": "126.7", "n_correct": "64.905", "wer_total": "126.7", "n_error": "61.76", "ppl": "9.82", "accuracy": "51.227", "wer": "48.745", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "21200", "lr": "9.341e-05", "gnorm": "3.83", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "58103"}
+[2024-06-15 17:06:30,559][train_inner][INFO] - {"epoch": 2, "update": 1.419, "loss": "3.322", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "64.895", "wer_total": "127.32", "n_error": "62.35", "ppl": "10", "accuracy": "50.97", "wer": "48.971", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "21400", "lr": "9.06532e-05", "gnorm": "3.802", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "58470"}
+[2024-06-15 17:12:36,881][train_inner][INFO] - {"epoch": 2, "update": 1.432, "loss": "3.296", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "64.87", "wer_total": "126.805", "n_error": "61.84", "ppl": "9.82", "accuracy": "51.157", "wer": "48.768", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "21600", "lr": "8.79777e-05", "gnorm": "3.746", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "58836"}
+[2024-06-15 17:18:43,129][train_inner][INFO] - {"epoch": 2, "update": 1.445, "loss": "3.258", "ntokens": "125.555", "acc_total": "125.555", "n_correct": "64.77", "wer_total": "125.555", "n_error": "60.72", "ppl": "9.57", "accuracy": "51.587", "wer": "48.361", "wps": "68.6", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "21800", "lr": "8.53812e-05", "gnorm": "3.738", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "59202"}
+[2024-06-15 17:24:49,544][train_inner][INFO] - {"epoch": 2, "update": 1.459, "loss": "3.341", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "65.975", "wer_total": "127.64", "n_error": "61.58", "ppl": "10.13", "accuracy": "51.688", "wer": "48.245", "wps": "69.7", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "22000", "lr": "8.28614e-05", "gnorm": "3.777", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "59569"}
+[2024-06-15 17:30:56,010][train_inner][INFO] - {"epoch": 2, "update": 1.472, "loss": "3.186", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "66.4", "wer_total": "126.515", "n_error": "60.07", "ppl": "9.1", "accuracy": "52.484", "wer": "47.481", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "22200", "lr": "8.04159e-05", "gnorm": "3.794", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "59935"}
+[2024-06-15 17:37:02,111][train_inner][INFO] - {"epoch": 2, "update": 1.485, "loss": "3.225", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "66.67", "wer_total": "127.11", "n_error": "60.36", "ppl": "9.35", "accuracy": "52.451", "wer": "47.486", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "22400", "lr": "7.80425e-05", "gnorm": "3.73", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "60301"}
+[2024-06-15 17:40:05,040][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 18:22:05,253][valid][INFO] - {"epoch": 2, "valid_loss": "3.131", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "9.73314", "valid_wer_total": "18.1585", "valid_n_error": "8.41546", "valid_ppl": "8.76", "valid_accuracy": "53.601", "valid_wer": "46.345", "valid_wps": "172.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "22500", "valid_best_accuracy": "53.601"}
+[2024-06-15 18:22:05,253][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 22500 updates
+[2024-06-15 18:22:05,253][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_22500.pt
+[2024-06-15 18:22:08,349][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_22500.pt
+[2024-06-15 18:22:12,995][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_22500.pt (epoch 2 @ 22500 updates, score 53.601) (writing took 7.7415905879752245 seconds)
+[2024-06-15 18:25:15,506][train_inner][INFO] - {"epoch": 2, "update": 1.498, "loss": "3.221", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "67.3", "wer_total": "126.78", "n_error": "59.43", "ppl": "9.33", "accuracy": "53.084", "wer": "46.876", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "22600", "lr": "7.57393e-05", "gnorm": "3.821", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "63195"}
+[2024-06-15 18:28:55,083][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
+[2024-06-15 18:31:23,449][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "3.244", "ntokens": "126.64", "acc_total": "126.64", "n_correct": "66.505", "wer_total": "126.64", "n_error": "60.04", "ppl": "9.47", "accuracy": "52.515", "wer": "47.41", "wps": "68.8", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "22800", "lr": "7.3504e-05", "gnorm": "3.771", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "63563"}
+[2024-06-15 18:37:29,523][train_inner][INFO] - {"epoch": 2, "update": 1.525, "loss": "3.269", "ntokens": "126.155", "acc_total": "126.155", "n_correct": "66.6", "wer_total": "126.155", "n_error": "59.525", "ppl": "9.64", "accuracy": "52.792", "wer": "47.184", "wps": "68.9", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "23000", "lr": "7.13346e-05", "gnorm": "3.81", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "63929"}
+[2024-06-15 18:43:35,841][train_inner][INFO] - {"epoch": 2, "update": 1.538, "loss": "3.166", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "68.74", "wer_total": "126.97", "n_error": "58.145", "ppl": "8.98", "accuracy": "54.139", "wer": "45.794", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "23200", "lr": "6.92293e-05", "gnorm": "3.723", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "64295"}
+[2024-06-15 18:49:41,585][train_inner][INFO] - {"epoch": 2, "update": 1.552, "loss": "3.098", "ntokens": "126.29", "acc_total": "126.29", "n_correct": "68.515", "wer_total": "126.29", "n_error": "57.74", "ppl": "8.56", "accuracy": "54.252", "wer": "45.72", "wps": "69.1", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "23400", "lr": "6.71862e-05", "gnorm": "3.828", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "64661"}
+[2024-06-15 18:55:47,236][train_inner][INFO] - {"epoch": 2, "update": 1.565, "loss": "3.298", "ntokens": "126.595", "acc_total": "126.595", "n_correct": "66.37", "wer_total": "126.595", "n_error": "60.185", "ppl": "9.84", "accuracy": "52.427", "wer": "47.541", "wps": "69.2", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "23600", "lr": "6.52033e-05", "gnorm": "3.813", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "65026"}
+[2024-06-15 19:01:52,797][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "3.186", "ntokens": "127.175", "acc_total": "127.175", "n_correct": "68.385", "wer_total": "127.175", "n_error": "58.74", "ppl": "9.1", "accuracy": "53.772", "wer": "46.188", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "23800", "lr": "6.3279e-05", "gnorm": "3.773", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "65392"}
+[2024-06-15 19:07:58,141][train_inner][INFO] - {"epoch": 2, "update": 1.591, "loss": "3.173", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "68.5", "wer_total": "126.775", "n_error": "58.25", "ppl": "9.02", "accuracy": "54.033", "wer": "45.948", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "24000", "lr": "6.14114e-05", "gnorm": "3.754", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "65757"}
+[2024-06-15 19:14:03,725][train_inner][INFO] - {"epoch": 2, "update": 1.605, "loss": "3.119", "ntokens": "126.17", "acc_total": "126.17", "n_correct": "68.96", "wer_total": "126.17", "n_error": "57.165", "ppl": "8.69", "accuracy": "54.656", "wer": "45.308", "wps": "69", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "24200", "lr": "5.9599e-05", "gnorm": "3.771", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "66123"}
+[2024-06-15 19:20:09,217][train_inner][INFO] - {"epoch": 2, "update": 1.618, "loss": "3.154", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "68.555", "wer_total": "127.2", "n_error": "58.595", "ppl": "8.9", "accuracy": "53.895", "wer": "46.065", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "24400", "lr": "5.784e-05", "gnorm": "3.826", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "66488"}
+[2024-06-15 19:26:14,776][train_inner][INFO] - {"epoch": 2, "update": 1.631, "loss": "3.209", "ntokens": "127.38", "acc_total": "127.38", "n_correct": "67.34", "wer_total": "127.38", "n_error": "59.995", "ppl": "9.25", "accuracy": "52.865", "wer": "47.099", "wps": "69.7", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "24600", "lr": "5.6133e-05", "gnorm": "3.751", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "66854"}
+[2024-06-15 19:32:20,366][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "3.106", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "69.355", "wer_total": "127.26", "n_error": "57.855", "ppl": "8.61", "accuracy": "54.499", "wer": "45.462", "wps": "69.6", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "24800", "lr": "5.44763e-05", "gnorm": "3.78", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "67219"}
+[2024-06-15 19:38:26,343][train_inner][INFO] - {"epoch": 2, "update": 1.658, "loss": "3.193", "ntokens": "125.92", "acc_total": "125.92", "n_correct": "67.235", "wer_total": "125.92", "n_error": "58.615", "ppl": "9.15", "accuracy": "53.395", "wer": "46.549", "wps": "68.8", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "25000", "lr": "5.28686e-05", "gnorm": "3.758", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "67585"}
+[2024-06-15 19:38:26,343][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 20:20:28,477][valid][INFO] - {"epoch": 2, "valid_loss": "3.029", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "10.2083", "valid_wer_total": "18.1585", "valid_n_error": "7.9431", "valid_ppl": "8.16", "valid_accuracy": "56.218", "valid_wer": "43.743", "valid_wps": "172.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "25000", "valid_best_accuracy": "56.218"}
+[2024-06-15 20:20:28,477][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25000 updates
+[2024-06-15 20:20:28,478][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_25000.pt
+[2024-06-15 20:20:31,577][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_25000.pt
+[2024-06-15 20:20:36,233][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_25000.pt (epoch 2 @ 25000 updates, score 56.218) (writing took 7.755430594988866 seconds)
+[2024-06-15 20:26:41,751][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "3.067", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "70.06", "wer_total": "127.44", "n_error": "57.335", "ppl": "8.38", "accuracy": "54.975", "wer": "44.99", "wps": "8.8", "ups": "0.07", "wpb": "127.4", "bsz": "8", "num_updates": "25200", "lr": "5.13083e-05", "gnorm": "3.825", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "70481"}
+[2024-06-15 20:32:47,584][train_inner][INFO] - {"epoch": 2, "update": 1.684, "loss": "3.124", "ntokens": "125.36", "acc_total": "125.36", "n_correct": "68.16", "wer_total": "125.36", "n_error": "57.115", "ppl": "8.72", "accuracy": "54.371", "wer": "45.561", "wps": "68.5", "ups": "0.55", "wpb": "125.4", "bsz": "8", "num_updates": "25400", "lr": "4.9794e-05", "gnorm": "3.752", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "70847"}
+[2024-06-15 20:38:53,215][train_inner][INFO] - {"epoch": 2, "update": 1.697, "loss": "3.056", "ntokens": "127.07", "acc_total": "127.07", "n_correct": "68.69", "wer_total": "127.07", "n_error": "58.32", "ppl": "8.32", "accuracy": "54.057", "wer": "45.896", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25600", "lr": "4.83244e-05", "gnorm": "3.772", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "71212"}
+[2024-06-15 20:44:59,014][train_inner][INFO] - {"epoch": 2, "update": 1.711, "loss": "3.05", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "69.53", "wer_total": "127.055", "n_error": "57.46", "ppl": "8.28", "accuracy": "54.724", "wer": "45.225", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25800", "lr": "4.68982e-05", "gnorm": "3.75", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "71578"}
+[2024-06-15 20:51:04,571][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "3.091", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "69.135", "wer_total": "126.935", "n_error": "57.745", "ppl": "8.52", "accuracy": "54.465", "wer": "45.492", "wps": "69.4", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "26000", "lr": "4.55141e-05", "gnorm": "3.751", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "71944"}
+[2024-06-15 20:57:09,808][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "3.133", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "69.275", "wer_total": "126.305", "n_error": "57.005", "ppl": "8.77", "accuracy": "54.847", "wer": "45.133", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "26200", "lr": "4.41708e-05", "gnorm": "3.723", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "72309"}
+[2024-06-15 21:03:15,053][train_inner][INFO] - {"epoch": 2, "update": 1.75, "loss": "3.039", "ntokens": "127.47", "acc_total": "127.47", "n_correct": "70.465", "wer_total": "127.47", "n_error": "56.965", "ppl": "8.22", "accuracy": "55.28", "wer": "44.689", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "26400", "lr": "4.28672e-05", "gnorm": "3.721", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "72674"}
+[2024-06-15 21:09:20,113][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "3.045", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "68.91", "wer_total": "126.6", "n_error": "57.625", "ppl": "8.26", "accuracy": "54.431", "wer": "45.517", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "26600", "lr": "4.16021e-05", "gnorm": "3.773", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "73039"}
+[2024-06-15 21:15:24,791][train_inner][INFO] - {"epoch": 2, "update": 1.777, "loss": "2.986", "ntokens": "126.565", "acc_total": "126.565", "n_correct": "69.745", "wer_total": "126.565", "n_error": "56.76", "ppl": "7.92", "accuracy": "55.106", "wer": "44.847", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "26800", "lr": "4.03743e-05", "gnorm": "3.756", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "73404"}
+[2024-06-15 21:21:29,264][train_inner][INFO] - {"epoch": 2, "update": 1.79, "loss": "3.08", "ntokens": "127.69", "acc_total": "127.69", "n_correct": "69.645", "wer_total": "127.69", "n_error": "57.975", "ppl": "8.45", "accuracy": "54.542", "wer": "45.403", "wps": "70.1", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "27000", "lr": "3.91827e-05", "gnorm": "3.743", "loss_scale": "4096", "train_wall": "364", "gb_free": "6.5", "wall": "73768"}
+[2024-06-15 21:27:33,660][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "3.051", "ntokens": "127.97", "acc_total": "127.97", "n_correct": "70.395", "wer_total": "127.97", "n_error": "57.49", "ppl": "8.29", "accuracy": "55.009", "wer": "44.925", "wps": "70.2", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "27200", "lr": "3.80263e-05", "gnorm": "3.71", "loss_scale": "4096", "train_wall": "364", "gb_free": "6.5", "wall": "74133"}
+[2024-06-15 21:30:26,633][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
+[2024-06-15 21:33:39,845][train_inner][INFO] - {"epoch": 2, "update": 1.817, "loss": "3.119", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "69.2", "wer_total": "126.675", "n_error": "57.415", "ppl": "8.69", "accuracy": "54.628", "wer": "45.325", "wps": "69.2", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "27400", "lr": "3.6904e-05", "gnorm": "3.819", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "74499"}
+[2024-06-15 21:36:42,097][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-15 22:18:36,263][valid][INFO] - {"epoch": 2, "valid_loss": "2.937", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "10.1775", "valid_wer_total": "18.1585", "valid_n_error": "7.97278", "valid_ppl": "7.66", "valid_accuracy": "56.048", "valid_wer": "43.907", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "27500", "valid_best_accuracy": "56.218"}
+[2024-06-15 22:18:36,264][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 27500 updates
+[2024-06-15 22:18:36,264][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_27500.pt
+[2024-06-15 22:18:39,470][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_27500.pt
+[2024-06-15 22:18:41,886][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_27500.pt (epoch 2 @ 27500 updates, score 56.048) (writing took 5.621618739998667 seconds)
+[2024-06-15 22:21:43,623][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "3.039", "ntokens": "126.45", "acc_total": "126.45", "n_correct": "69.105", "wer_total": "126.45", "n_error": "57.275", "ppl": "8.22", "accuracy": "54.65", "wer": "45.295", "wps": "8.8", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "27600", "lr": "3.58149e-05", "gnorm": "3.756", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "77383"}
+[2024-06-15 22:27:47,691][train_inner][INFO] - {"epoch": 2, "update": 1.843, "loss": "3.061", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "69.745", "wer_total": "127.465", "n_error": "57.66", "ppl": "8.35", "accuracy": "54.717", "wer": "45.236", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "27800", "lr": "3.47579e-05", "gnorm": "3.727", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "77747"}
+[2024-06-15 22:33:51,437][train_inner][INFO] - {"epoch": 2, "update": 1.856, "loss": "3.037", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "70.505", "wer_total": "127.49", "n_error": "56.905", "ppl": "8.21", "accuracy": "55.302", "wer": "44.635", "wps": "70.1", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "28000", "lr": "3.37321e-05", "gnorm": "3.71", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "78111"}
+[2024-06-15 22:39:55,210][train_inner][INFO] - {"epoch": 2, "update": 1.87, "loss": "3.016", "ntokens": "127.75", "acc_total": "127.75", "n_correct": "70.755", "wer_total": "127.75", "n_error": "56.925", "ppl": "8.09", "accuracy": "55.386", "wer": "44.56", "wps": "70.2", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "28200", "lr": "3.27365e-05", "gnorm": "3.758", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "78474"}
+[2024-06-15 22:45:58,863][train_inner][INFO] - {"epoch": 2, "update": 1.883, "loss": "2.992", "ntokens": "127.125", "acc_total": "127.125", "n_correct": "69.96", "wer_total": "127.125", "n_error": "57.105", "ppl": "7.96", "accuracy": "55.032", "wer": "44.92", "wps": "69.9", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "28400", "lr": "3.17704e-05", "gnorm": "3.851", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "78838"}
+[2024-06-15 22:52:02,677][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "3.044", "ntokens": "128.015", "acc_total": "128.015", "n_correct": "71.02", "wer_total": "128.015", "n_error": "56.965", "ppl": "8.25", "accuracy": "55.478", "wer": "44.499", "wps": "70.4", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "28600", "lr": "3.08327e-05", "gnorm": "3.835", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "79202"}
+[2024-06-15 22:58:06,658][train_inner][INFO] - {"epoch": 2, "update": 1.91, "loss": "2.994", "ntokens": "126.755", "acc_total": "126.755", "n_correct": "70.175", "wer_total": "126.755", "n_error": "56.515", "ppl": "7.96", "accuracy": "55.363", "wer": "44.586", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "28800", "lr": "2.99228e-05", "gnorm": "3.736", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "79566"}
+[2024-06-15 23:04:10,640][train_inner][INFO] - {"epoch": 2, "update": 1.923, "loss": "2.957", "ntokens": "127.74", "acc_total": "127.74", "n_correct": "72.31", "wer_total": "127.74", "n_error": "55.395", "ppl": "7.77", "accuracy": "56.607", "wer": "43.365", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "29000", "lr": "2.90397e-05", "gnorm": "3.723", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "79930"}
+[2024-06-15 23:10:14,620][train_inner][INFO] - {"epoch": 2, "update": 1.936, "loss": "3.027", "ntokens": "126.765", "acc_total": "126.765", "n_correct": "70.3", "wer_total": "126.765", "n_error": "56.41", "ppl": "8.15", "accuracy": "55.457", "wer": "44.5", "wps": "69.7", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "29200", "lr": "2.81826e-05", "gnorm": "3.804", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "80294"}
+[2024-06-15 23:16:18,545][train_inner][INFO] - {"epoch": 2, "update": 1.949, "loss": "3.051", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "70.715", "wer_total": "127.17", "n_error": "56.435", "ppl": "8.29", "accuracy": "55.607", "wer": "44.378", "wps": "69.9", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "29400", "lr": "2.73509e-05", "gnorm": "3.761", "loss_scale": "4096", "train_wall": "363", "gb_free": "6.5", "wall": "80658"}
+[2024-06-15 23:22:22,933][train_inner][INFO] - {"epoch": 2, "update": 1.963, "loss": "2.951", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "71.52", "wer_total": "127.1", "n_error": "55.515", "ppl": "7.73", "accuracy": "56.271", "wer": "43.678", "wps": "69.8", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "29600", "lr": "2.65436e-05", "gnorm": "3.781", "loss_scale": "4096", "train_wall": "364", "gb_free": "6.5", "wall": "81022"}
+[2024-06-15 23:28:27,792][train_inner][INFO] - {"epoch": 2, "update": 1.976, "loss": "2.994", "ntokens": "126.885", "acc_total": "126.885", "n_correct": "71.245", "wer_total": "126.885", "n_error": "55.59", "ppl": "7.97", "accuracy": "56.149", "wer": "43.811", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "29800", "lr": "2.57603e-05", "gnorm": "3.778", "loss_scale": "4096", "train_wall": "364", "gb_free": "6.5", "wall": "81387"}
+[2024-06-15 23:34:08,766][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
+[2024-06-15 23:34:34,272][train_inner][INFO] - {"epoch": 2, "update": 1.989, "loss": "3.001", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "70.18", "wer_total": "126.935", "n_error": "56.71", "ppl": "8.01", "accuracy": "55.288", "wer": "44.676", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "30000", "lr": "2.5e-05", "gnorm": "3.752", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "81753"}
+[2024-06-15 23:34:34,272][fairseq_cli.train][INFO] - Stopping training due to num_updates: 30000 >= max_update: 30000
+[2024-06-15 23:34:34,272][fairseq_cli.train][INFO] - begin validation on "valid" subset
+[2024-06-16 00:16:26,410][valid][INFO] - {"epoch": 2, "valid_loss": "2.918", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "10.2358", "valid_wer_total": "18.1585", "valid_n_error": "7.91417", "valid_ppl": "7.56", "valid_accuracy": "56.369", "valid_wer": "43.584", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "56.369"}
+[2024-06-16 00:16:26,411][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 30000 updates
+[2024-06-16 00:16:26,411][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_30000.pt
+[2024-06-16 00:16:29,520][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_30000.pt
+[2024-06-16 00:16:33,910][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_30000.pt (epoch 2 @ 30000 updates, score 56.369) (writing took 7.499138368002605 seconds)
+[2024-06-16 00:16:34,081][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below)
+[2024-06-16 00:16:34,097][train][INFO] - {"epoch": 2, "train_loss": "3.33", "train_ntokens": "126.909", "train_acc_total": "126.909", "train_n_correct": "65.0846", "train_wer_total": "126.909", "train_n_error": "61.7495", "train_ppl": "10.05", "train_accuracy": "51.285", "train_wer": "48.657", "train_wps": "45.1", "train_ups": "0.36", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "30000", "train_lr": "2.5e-05", "train_gnorm": "3.667", "train_loss_scale": "2048", "train_train_wall": "26712", "train_gb_free": "6.5", "train_wall": "84273"}
+[2024-06-16 00:16:34,097][fairseq_cli.train][INFO] - done training in 84272.5 seconds