ViAVSP-LLM_v1.1.2 / hydra_train.log

Upload 10 files

2a54b5d verified 7 months ago

206 kB

	[2024-06-19 17:41:50,858][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 30000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 18000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_1_2', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_1_2', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video', 'audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 10000, 'hold_steps': 0, 'decay_steps': 20000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 30000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}}
	[2024-06-19 17:41:50,860][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.1.2
	[2024-06-19 17:41:50,860][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_1_2', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_1_2', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video', 'audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True}
	[2024-06-19 17:41:52,754][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.1.2
	[2024-06-19 17:41:52,754][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_1_2', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False}
	[2024-06-19 17:41:52,758][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True}
	[2024-06-19 17:41:59,669][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count(
	(encoder): HubertEncoderWrapper(
	(w2v_model): AVHubertModel(
	(feature_extractor_audio): SubModel(
	(proj): Linear(in_features=104, out_features=1024, bias=True)
	)
	(feature_extractor_video): SubModel(
	(resnet): ResEncoder(
	(frontend3D): Sequential(
	(0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False)
	(1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(2): PReLU(num_parameters=64)
	(3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
	)
	(trunk): ResNet(
	(layer1): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=64)
	(relu2): PReLU(num_parameters=64)
	(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	(1): BasicBlock(
	(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=64)
	(relu2): PReLU(num_parameters=64)
	(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer2): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=128)
	(relu2): PReLU(num_parameters=128)
	(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=128)
	(relu2): PReLU(num_parameters=128)
	(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer3): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=256)
	(relu2): PReLU(num_parameters=256)
	(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=256)
	(relu2): PReLU(num_parameters=256)
	(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer4): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=512)
	(relu2): PReLU(num_parameters=512)
	(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=512)
	(relu2): PReLU(num_parameters=512)
	(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(avgpool): AdaptiveAvgPool2d(output_size=1)
	)
	)
	(proj): Linear(in_features=512, out_features=1024, bias=True)
	)
	(post_extract_proj): Linear(in_features=2048, out_features=1024, bias=True)
	(dropout_input): Dropout(p=0.0, inplace=False)
	(dropout_features): Dropout(p=0.1, inplace=False)
	(encoder): TransformerEncoder(
	(pos_conv): Sequential(
	(0): Conv1d(1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16)
	(1): SamePad()
	(2): GELU(approximate='none')
	)
	(layers): ModuleList(
	(0-23): 24 x TransformerSentenceEncoderLayer(
	(self_attn): MultiheadAttention(
	(dropout_module): FairseqDropout()
	(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
	)
	(dropout1): Dropout(p=0.0, inplace=False)
	(dropout2): Dropout(p=0.1, inplace=False)
	(dropout3): Dropout(p=0.0, inplace=False)
	(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	(fc1): Linear(in_features=1024, out_features=4096, bias=True)
	(fc2): Linear(in_features=4096, out_features=1024, bias=True)
	(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	)
	)
	(layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	)
	(layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
	(final_proj): None
	)
	)
	(decoder): PeftModelForCausalLM(
	(base_model): LoraModel(
	(model): LlamaForCausalLM(
	(model): LlamaModel(
	(embed_tokens): Embedding(46304, 2560, padding_idx=0)
	(layers): ModuleList(
	(0-31): 32 x LlamaDecoderLayer(
	(self_attn): LlamaSdpaAttention(
	(q_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(k_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(v_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(rotary_emb): LlamaRotaryEmbedding()
	)
	(mlp): LlamaMLP(
	(gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
	(up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
	(down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False)
	(act_fn): SiLU()
	)
	(input_layernorm): LlamaRMSNorm()
	(post_attention_layernorm): LlamaRMSNorm()
	)
	)
	(norm): LlamaRMSNorm()
	)
	(lm_head): Linear(in_features=2560, out_features=46304, bias=False)
	)
	)
	)
	(avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True)
	)
	[2024-06-19 17:41:59,674][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask
	[2024-06-19 17:41:59,674][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count
	[2024-06-19 17:41:59,674][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss
	[2024-06-19 17:41:59,677][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424)
	[2024-06-19 17:41:59,680][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0)
	[2024-06-19 17:41:59,681][src.vsp_llm_training][INFO] - Using tokenizer
	[2024-06-19 17:41:59,720][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76
	[2024-06-19 17:42:00,058][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_1_2/valid.wrd is sequence label. skipped
	[2024-06-19 17:42:00,058][src.vsp_llm_dataset][INFO] - image transform: Compose(
	Normalize(mean=0.0, std=255.0)
	<src.utils_vsp_llm.CenterCrop object at 0x7ba3398c8340>
	Normalize(mean=0.421, std=0.165)
	)
	[2024-06-19 17:42:00,058][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
	[2024-06-19 17:42:00,058][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias
	[2024-06-19 17:42:00,214][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias
	[2024-06-19 17:42:00,215][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias
	[2024-06-19 17:42:00,216][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,217][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,218][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,219][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,220][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias
	[2024-06-19 17:42:00,221][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias
	[2024-06-19 17:42:00,222][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias
	[2024-06-19 17:42:00,222][fairseq.utils][INFO] - *********************CUDA enviroments for all 1 workers*********************
	[2024-06-19 17:42:00,222][fairseq.utils][INFO] - rank 0: capabilities = 8.6 ; total memory = 15.729 GB ; name = NVIDIA RTX A4000
	[2024-06-19 17:42:00,222][fairseq.utils][INFO] - *********************CUDA enviroments for all 1 workers*********************
	[2024-06-19 17:42:00,223][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs)
	[2024-06-19 17:42:00,223][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1
	[2024-06-19 17:42:00,223][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt
	[2024-06-19 17:42:00,223][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt
	[2024-06-19 17:42:00,223][fairseq.trainer][INFO] - loading train data for epoch 1
	[2024-06-19 17:42:00,223][src.vsp_llm_training][INFO] - Using tokenizer
	[2024-06-19 17:42:00,393][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 120686, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73
	[2024-06-19 17:42:00,771][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_1_2/train.wrd is sequence label. skipped
	[2024-06-19 17:42:00,771][src.vsp_llm_dataset][INFO] - image transform: Compose(
	Normalize(mean=0.0, std=255.0)
	RandomCrop(size=(88, 88))
	<src.utils_vsp_llm.HorizontalFlip object at 0x7ba339a29d00>
	Normalize(mean=0.421, std=0.165)
	)
	[2024-06-19 17:42:00,771][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
	[2024-06-19 17:42:00,771][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
	[2024-06-19 17:42:04,957][fairseq.trainer][INFO] - begin training epoch 1
	[2024-06-19 17:42:04,957][fairseq_cli.train][INFO] - Start iterating over samples
	[2024-06-19 17:47:32,754][train_inner][INFO] - {"epoch": 1, "update": 0.013, "loss": "7.666", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "18.025", "wer_total": "126.725", "n_error": "108.615", "ppl": "203.09", "accuracy": "14.224", "wer": "85.709", "wps": "77.5", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "200", "lr": "1.49e-05", "gnorm": "9.027", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "333"}
	[2024-06-19 17:53:02,822][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "6.202", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "25.71", "wer_total": "126.93", "n_error": "101.03", "ppl": "73.6", "accuracy": "20.255", "wer": "79.595", "wps": "76.9", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "400", "lr": "2.48e-05", "gnorm": "3.713", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "663"}
	[2024-06-19 17:58:32,882][train_inner][INFO] - {"epoch": 1, "update": 0.04, "loss": "6.073", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "28.67", "wer_total": "127.015", "n_error": "98.07", "ppl": "67.34", "accuracy": "22.572", "wer": "77.211", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "600", "lr": "3.47e-05", "gnorm": "3.951", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "993"}
	[2024-06-19 18:04:03,056][train_inner][INFO] - {"epoch": 1, "update": 0.053, "loss": "5.868", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "30.615", "wer_total": "126.865", "n_error": "96.015", "ppl": "58.4", "accuracy": "24.132", "wer": "75.683", "wps": "76.8", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "800", "lr": "4.46e-05", "gnorm": "4.137", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "1323"}
	[2024-06-19 18:09:33,095][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "5.932", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "30.64", "wer_total": "127.025", "n_error": "96.145", "ppl": "61.04", "accuracy": "24.121", "wer": "75.69", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "1000", "lr": "5.45e-05", "gnorm": "3.793", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "1653"}
	[2024-06-19 18:15:03,262][train_inner][INFO] - {"epoch": 1, "update": 0.08, "loss": "5.882", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "30.975", "wer_total": "127.095", "n_error": "95.87", "ppl": "58.96", "accuracy": "24.372", "wer": "75.432", "wps": "77", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "1200", "lr": "6.44e-05", "gnorm": "3.6", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "1983"}
	[2024-06-19 18:20:33,407][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "5.723", "ntokens": "127.62", "acc_total": "127.62", "n_correct": "32.255", "wer_total": "127.62", "n_error": "95.16", "ppl": "52.81", "accuracy": "25.274", "wer": "74.565", "wps": "77.3", "ups": "0.61", "wpb": "127.6", "bsz": "8", "num_updates": "1400", "lr": "7.43e-05", "gnorm": "3.409", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "2313"}
	[2024-06-19 18:26:03,836][train_inner][INFO] - {"epoch": 1, "update": 0.106, "loss": "5.735", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "32.6", "wer_total": "127.41", "n_error": "94.52", "ppl": "53.26", "accuracy": "25.587", "wer": "74.186", "wps": "77.1", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "1600", "lr": "8.42e-05", "gnorm": "3.171", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "2644"}
	[2024-06-19 18:31:34,157][train_inner][INFO] - {"epoch": 1, "update": 0.119, "loss": "5.777", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "32.165", "wer_total": "126.56", "n_error": "94.145", "ppl": "54.82", "accuracy": "25.415", "wer": "74.388", "wps": "76.6", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "1800", "lr": "9.41e-05", "gnorm": "2.975", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "2974"}
	[2024-06-19 18:37:04,371][train_inner][INFO] - {"epoch": 1, "update": 0.133, "loss": "5.678", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "33.505", "wer_total": "126.875", "n_error": "93.145", "ppl": "51.19", "accuracy": "26.408", "wer": "73.415", "wps": "76.8", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "2000", "lr": "0.000104", "gnorm": "2.812", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "3304"}
	[2024-06-19 18:42:34,647][train_inner][INFO] - {"epoch": 1, "update": 0.146, "loss": "5.597", "ntokens": "128.2", "acc_total": "128.2", "n_correct": "34.97", "wer_total": "128.2", "n_error": "92.91", "ppl": "48.41", "accuracy": "27.278", "wer": "72.473", "wps": "77.6", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "2200", "lr": "0.0001139", "gnorm": "2.752", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "3634"}
	[2024-06-19 18:48:04,831][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "5.492", "ntokens": "127.775", "acc_total": "127.775", "n_correct": "37.24", "wer_total": "127.775", "n_error": "90.305", "ppl": "45.01", "accuracy": "29.145", "wer": "70.675", "wps": "77.4", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "2400", "lr": "0.0001238", "gnorm": "2.899", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "3965"}
	[2024-06-19 18:50:49,847][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-19 19:32:44,384][valid][INFO] - {"epoch": 1, "valid_loss": "5.254", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "5.68654", "valid_wer_total": "18.1585", "valid_n_error": "12.4486", "valid_ppl": "38.16", "valid_accuracy": "31.316", "valid_wer": "68.555", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "2500"}
	[2024-06-19 19:32:44,385][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 2500 updates
	[2024-06-19 19:32:44,385][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_2500.pt
	[2024-06-19 19:32:47,638][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_2500.pt
	[2024-06-19 19:32:50,600][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_2500.pt (epoch 1 @ 2500 updates, score 31.316) (writing took 6.215355046035256 seconds)
	[2024-06-19 19:35:35,199][train_inner][INFO] - {"epoch": 1, "update": 0.172, "loss": "5.3", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "39.695", "wer_total": "126.92", "n_error": "86.87", "ppl": "39.39", "accuracy": "31.276", "wer": "68.445", "wps": "8.9", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "2600", "lr": "0.0001337", "gnorm": "3.27", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "6815"}
	[2024-06-19 19:41:04,945][train_inner][INFO] - {"epoch": 1, "update": 0.186, "loss": "5.147", "ntokens": "125.685", "acc_total": "125.685", "n_correct": "41.985", "wer_total": "125.685", "n_error": "83.46", "ppl": "35.43", "accuracy": "33.405", "wer": "66.404", "wps": "76.2", "ups": "0.61", "wpb": "125.7", "bsz": "8", "num_updates": "2800", "lr": "0.0001436", "gnorm": "3.724", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "7145"}
	[2024-06-19 19:46:34,738][train_inner][INFO] - {"epoch": 1, "update": 0.199, "loss": "4.893", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "46.89", "wer_total": "127.19", "n_error": "80.035", "ppl": "29.72", "accuracy": "36.866", "wer": "62.926", "wps": "77.1", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "3000", "lr": "0.0001535", "gnorm": "3.988", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "7475"}
	[2024-06-19 19:52:04,380][train_inner][INFO] - {"epoch": 1, "update": 0.212, "loss": "4.619", "ntokens": "126.535", "acc_total": "126.535", "n_correct": "50.435", "wer_total": "126.535", "n_error": "75.86", "ppl": "24.58", "accuracy": "39.859", "wer": "59.952", "wps": "76.8", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "3200", "lr": "0.0001634", "gnorm": "4.198", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "7804"}
	[2024-06-19 19:57:34,101][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "4.456", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "53.02", "wer_total": "126.53", "n_error": "73.335", "ppl": "21.94", "accuracy": "41.903", "wer": "57.959", "wps": "76.7", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "3400", "lr": "0.0001733", "gnorm": "4.314", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "8134"}
	[2024-06-19 20:03:03,739][train_inner][INFO] - {"epoch": 1, "update": 0.239, "loss": "4.214", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "55.795", "wer_total": "127.025", "n_error": "71.035", "ppl": "18.56", "accuracy": "43.924", "wer": "55.922", "wps": "77.1", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "3600", "lr": "0.0001832", "gnorm": "4.407", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "8464"}
	[2024-06-19 20:08:33,531][train_inner][INFO] - {"epoch": 1, "update": 0.252, "loss": "4.057", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "58.035", "wer_total": "127.35", "n_error": "69.11", "ppl": "16.64", "accuracy": "45.571", "wer": "54.268", "wps": "77.2", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "3800", "lr": "0.0001931", "gnorm": "4.417", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "8793"}
	[2024-06-19 20:14:03,352][train_inner][INFO] - {"epoch": 1, "update": 0.265, "loss": "3.921", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "59.98", "wer_total": "127.785", "n_error": "67.605", "ppl": "15.15", "accuracy": "46.938", "wer": "52.905", "wps": "77.5", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "4000", "lr": "0.000203", "gnorm": "4.452", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "9123"}
	[2024-06-19 20:19:33,303][train_inner][INFO] - {"epoch": 1, "update": 0.278, "loss": "3.768", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "61.415", "wer_total": "126.255", "n_error": "64.635", "ppl": "13.62", "accuracy": "48.644", "wer": "51.194", "wps": "76.5", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "4200", "lr": "0.0002129", "gnorm": "4.451", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "9453"}
	[2024-06-19 20:25:03,138][train_inner][INFO] - {"epoch": 1, "update": 0.292, "loss": "3.656", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "62.475", "wer_total": "125.9", "n_error": "63.315", "ppl": "12.61", "accuracy": "49.623", "wer": "50.29", "wps": "76.3", "ups": "0.61", "wpb": "125.9", "bsz": "8", "num_updates": "4400", "lr": "0.0002228", "gnorm": "4.347", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "9783"}
	[2024-06-19 20:30:33,127][train_inner][INFO] - {"epoch": 1, "update": 0.305, "loss": "3.51", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "65.175", "wer_total": "127.885", "n_error": "62.61", "ppl": "11.39", "accuracy": "50.964", "wer": "48.958", "wps": "77.5", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "4600", "lr": "0.0002327", "gnorm": "4.336", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "10113"}
	[2024-06-19 20:36:02,893][train_inner][INFO] - {"epoch": 1, "update": 0.318, "loss": "3.425", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "65.52", "wer_total": "126.21", "n_error": "60.595", "ppl": "10.74", "accuracy": "51.913", "wer": "48.011", "wps": "76.5", "ups": "0.61", "wpb": "126.2", "bsz": "8", "num_updates": "4800", "lr": "0.0002426", "gnorm": "4.321", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "10443"}
	[2024-06-19 20:41:32,750][train_inner][INFO] - {"epoch": 1, "update": 0.331, "loss": "3.324", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "66.945", "wer_total": "126.87", "n_error": "59.79", "ppl": "10.02", "accuracy": "52.767", "wer": "47.127", "wps": "76.9", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "5000", "lr": "0.0002525", "gnorm": "4.26", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "10773"}
	[2024-06-19 20:41:32,751][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-19 21:23:27,265][valid][INFO] - {"epoch": 1, "valid_loss": "2.988", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "10.2995", "valid_wer_total": "18.1585", "valid_n_error": "7.84218", "valid_ppl": "7.94", "valid_accuracy": "56.72", "valid_wer": "43.187", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "5000", "valid_best_accuracy": "56.72"}
	[2024-06-19 21:23:27,266][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 5000 updates
	[2024-06-19 21:23:27,266][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_5000.pt
	[2024-06-19 21:23:30,451][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_5000.pt
	[2024-06-19 21:23:34,940][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_5000.pt (epoch 1 @ 5000 updates, score 56.72) (writing took 7.6741928230039775 seconds)
	[2024-06-19 21:29:04,558][train_inner][INFO] - {"epoch": 1, "update": 0.345, "loss": "3.106", "ntokens": "126.51", "acc_total": "126.51", "n_correct": "69.59", "wer_total": "126.51", "n_error": "56.795", "ppl": "8.61", "accuracy": "55.008", "wer": "44.894", "wps": "8.9", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "5200", "lr": "0.0002624", "gnorm": "4.15", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "13624"}
	[2024-06-19 21:34:34,469][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "3.134", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "69.375", "wer_total": "127.425", "n_error": "57.96", "ppl": "8.78", "accuracy": "54.444", "wer": "45.486", "wps": "77.2", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "5400", "lr": "0.0002723", "gnorm": "4.29", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "13954"}
	[2024-06-19 21:40:04,384][train_inner][INFO] - {"epoch": 1, "update": 0.371, "loss": "3.03", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "70.785", "wer_total": "127.52", "n_error": "56.615", "ppl": "8.17", "accuracy": "55.509", "wer": "44.397", "wps": "77.3", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "5600", "lr": "0.0002822", "gnorm": "4.233", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "14284"}
	[2024-06-19 21:45:34,151][train_inner][INFO] - {"epoch": 1, "update": 0.384, "loss": "3.108", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "69.65", "wer_total": "126.59", "n_error": "56.795", "ppl": "8.62", "accuracy": "55.02", "wer": "44.865", "wps": "76.8", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "5800", "lr": "0.0002921", "gnorm": "4.114", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "14614"}
	[2024-06-19 21:51:03,835][train_inner][INFO] - {"epoch": 1, "update": 0.398, "loss": "2.952", "ntokens": "127.395", "acc_total": "127.395", "n_correct": "71.77", "wer_total": "127.395", "n_error": "55.46", "ppl": "7.74", "accuracy": "56.337", "wer": "43.534", "wps": "77.3", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "6000", "lr": "0.000302", "gnorm": "4.049", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "14944"}
	[2024-06-19 21:56:33,543][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "2.872", "ntokens": "126.995", "acc_total": "126.995", "n_correct": "72.77", "wer_total": "126.995", "n_error": "54.105", "ppl": "7.32", "accuracy": "57.301", "wer": "42.604", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "6200", "lr": "0.0003119", "gnorm": "4.055", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "15273"}
	[2024-06-19 22:02:03,147][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "2.901", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "71.995", "wer_total": "126.01", "n_error": "53.885", "ppl": "7.47", "accuracy": "57.134", "wer": "42.762", "wps": "76.5", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "6400", "lr": "0.0003218", "gnorm": "3.95", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "15603"}
	[2024-06-19 22:07:32,790][train_inner][INFO] - {"epoch": 1, "update": 0.437, "loss": "2.818", "ntokens": "126.33", "acc_total": "126.33", "n_correct": "72.805", "wer_total": "126.33", "n_error": "53.385", "ppl": "7.05", "accuracy": "57.631", "wer": "42.258", "wps": "76.6", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "6600", "lr": "0.0003317", "gnorm": "4.139", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "15933"}
	[2024-06-19 22:13:02,505][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "2.77", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "73.575", "wer_total": "126.735", "n_error": "53.07", "ppl": "6.82", "accuracy": "58.054", "wer": "41.875", "wps": "76.9", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "6800", "lr": "0.0003416", "gnorm": "4.009", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "16262"}
	[2024-06-19 22:18:32,202][train_inner][INFO] - {"epoch": 1, "update": 0.464, "loss": "2.668", "ntokens": "126.98", "acc_total": "126.98", "n_correct": "75.325", "wer_total": "126.98", "n_error": "51.54", "ppl": "6.36", "accuracy": "59.32", "wer": "40.589", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "7000", "lr": "0.0003515", "gnorm": "3.902", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "16592"}
	[2024-06-19 22:24:01,987][train_inner][INFO] - {"epoch": 1, "update": 0.477, "loss": "2.789", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "73.45", "wer_total": "126.97", "n_error": "53.38", "ppl": "6.91", "accuracy": "57.848", "wer": "42.041", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "7200", "lr": "0.0003614", "gnorm": "4.126", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "16922"}
	[2024-06-19 22:29:32,003][train_inner][INFO] - {"epoch": 1, "update": 0.491, "loss": "2.586", "ntokens": "127.45", "acc_total": "127.45", "n_correct": "76.89", "wer_total": "127.45", "n_error": "50.43", "ppl": "6", "accuracy": "60.33", "wer": "39.568", "wps": "77.2", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "7400", "lr": "0.0003713", "gnorm": "3.965", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "17252"}
	[2024-06-19 22:32:16,876][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-19 23:14:12,471][valid][INFO] - {"epoch": 1, "valid_loss": "2.397", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "11.3966", "valid_wer_total": "18.1585", "valid_n_error": "6.74048", "valid_ppl": "5.27", "valid_accuracy": "62.762", "valid_wer": "37.12", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "7500", "valid_best_accuracy": "62.762"}
	[2024-06-19 23:14:12,471][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 7500 updates
	[2024-06-19 23:14:12,472][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_7500.pt
	[2024-06-19 23:14:15,731][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_7500.pt
	[2024-06-19 23:14:20,254][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_7500.pt (epoch 1 @ 7500 updates, score 62.762) (writing took 7.782660273020156 seconds)
	[2024-06-19 23:17:04,876][train_inner][INFO] - {"epoch": 1, "update": 0.504, "loss": "2.619", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "76.09", "wer_total": "127.32", "n_error": "51.16", "ppl": "6.14", "accuracy": "59.763", "wer": "40.182", "wps": "8.9", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "7600", "lr": "0.0003812", "gnorm": "4.007", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "20105"}
	[2024-06-19 23:22:34,834][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "2.625", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "76.075", "wer_total": "127.11", "n_error": "50.9", "ppl": "6.17", "accuracy": "59.85", "wer": "40.044", "wps": "77", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "7800", "lr": "0.0003911", "gnorm": "3.853", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "20435"}
	[2024-06-19 23:28:04,713][train_inner][INFO] - {"epoch": 1, "update": 0.53, "loss": "2.583", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "76.46", "wer_total": "126.875", "n_error": "50.285", "ppl": "5.99", "accuracy": "60.264", "wer": "39.633", "wps": "76.9", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "8000", "lr": "0.000401", "gnorm": "3.763", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "20764"}
	[2024-06-19 23:33:34,370][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "2.548", "ntokens": "126.05", "acc_total": "126.05", "n_correct": "76.76", "wer_total": "126.05", "n_error": "49.185", "ppl": "5.85", "accuracy": "60.896", "wer": "39.02", "wps": "76.5", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "8200", "lr": "0.0004109", "gnorm": "3.952", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "21094"}
	[2024-06-19 23:39:04,067][train_inner][INFO] - {"epoch": 1, "update": 0.557, "loss": "2.491", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "78.205", "wer_total": "126.785", "n_error": "48.49", "ppl": "5.62", "accuracy": "61.683", "wer": "38.246", "wps": "76.9", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "8400", "lr": "0.0004208", "gnorm": "3.842", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "21424"}
	[2024-06-19 23:44:33,995][train_inner][INFO] - {"epoch": 1, "update": 0.57, "loss": "2.474", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "77.185", "wer_total": "126.675", "n_error": "49.4", "ppl": "5.55", "accuracy": "60.932", "wer": "38.997", "wps": "76.8", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "8600", "lr": "0.0004307", "gnorm": "3.86", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "21754"}
	[2024-06-19 23:50:03,810][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "2.515", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "77.425", "wer_total": "127.09", "n_error": "49.615", "ppl": "5.72", "accuracy": "60.921", "wer": "39.039", "wps": "77.1", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "8800", "lr": "0.0004406", "gnorm": "4.021", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "22084"}
	[2024-06-19 23:55:33,712][train_inner][INFO] - {"epoch": 1, "update": 0.597, "loss": "2.496", "ntokens": "127.43", "acc_total": "127.43", "n_correct": "78.25", "wer_total": "127.43", "n_error": "49.115", "ppl": "5.64", "accuracy": "61.406", "wer": "38.543", "wps": "77.3", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "9000", "lr": "0.0004505", "gnorm": "3.965", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "22413"}
	[2024-06-20 00:01:03,485][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "2.448", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "78.6", "wer_total": "127.145", "n_error": "48.42", "ppl": "5.46", "accuracy": "61.819", "wer": "38.083", "wps": "77.1", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "9200", "lr": "0.0004604", "gnorm": "3.935", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "22743"}
	[2024-06-20 00:06:33,313][train_inner][INFO] - {"epoch": 1, "update": 0.623, "loss": "2.442", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "77.815", "wer_total": "125.995", "n_error": "48.095", "ppl": "5.43", "accuracy": "61.76", "wer": "38.172", "wps": "76.4", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "9400", "lr": "0.0004703", "gnorm": "4.091", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "23073"}
	[2024-06-20 00:12:03,072][train_inner][INFO] - {"epoch": 1, "update": 0.636, "loss": "2.435", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "78.52", "wer_total": "126.87", "n_error": "48.25", "ppl": "5.41", "accuracy": "61.89", "wer": "38.031", "wps": "76.9", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "9600", "lr": "0.0004802", "gnorm": "3.954", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "23403"}
	[2024-06-20 00:17:32,899][train_inner][INFO] - {"epoch": 1, "update": 0.65, "loss": "2.397", "ntokens": "125.68", "acc_total": "125.68", "n_correct": "78.67", "wer_total": "125.68", "n_error": "46.915", "ppl": "5.27", "accuracy": "62.595", "wer": "37.329", "wps": "76.2", "ups": "0.61", "wpb": "125.7", "bsz": "8", "num_updates": "9800", "lr": "0.0004901", "gnorm": "4.136", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "23733"}
	[2024-06-20 00:23:02,621][train_inner][INFO] - {"epoch": 1, "update": 0.663, "loss": "2.393", "ntokens": "127.56", "acc_total": "127.56", "n_correct": "79.005", "wer_total": "127.56", "n_error": "48.445", "ppl": "5.25", "accuracy": "61.936", "wer": "37.978", "wps": "77.4", "ups": "0.61", "wpb": "127.6", "bsz": "8", "num_updates": "10000", "lr": "0.0005", "gnorm": "3.942", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "24062"}
	[2024-06-20 00:23:02,622][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 01:04:56,225][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "12.0113", "valid_wer_total": "18.1585", "valid_n_error": "6.13472", "valid_ppl": "nan", "valid_accuracy": "66.147", "valid_wer": "33.784", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "10000", "valid_best_accuracy": "66.147"}
	[2024-06-20 01:04:56,226][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 10000 updates
	[2024-06-20 01:04:56,226][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_10000.pt
	[2024-06-20 01:04:59,428][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_10000.pt
	[2024-06-20 01:05:03,968][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_10000.pt (epoch 1 @ 10000 updates, score 66.147) (writing took 7.742633692978416 seconds)
	[2024-06-20 01:08:31,203][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-20 01:10:34,835][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "2.396", "ntokens": "126.63", "acc_total": "126.63", "n_correct": "78.69", "wer_total": "126.63", "n_error": "47.855", "ppl": "5.26", "accuracy": "62.142", "wer": "37.791", "wps": "8.9", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "10200", "lr": "0.000485243", "gnorm": "4.213", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "26915"}
	[2024-06-20 01:16:04,683][train_inner][INFO] - {"epoch": 1, "update": 0.689, "loss": "2.355", "ntokens": "127.215", "acc_total": "127.215", "n_correct": "79.675", "wer_total": "127.215", "n_error": "47.45", "ppl": "5.12", "accuracy": "62.63", "wer": "37.299", "wps": "77.1", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "10400", "lr": "0.000470922", "gnorm": "3.945", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "27244"}
	[2024-06-20 01:21:34,424][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "2.363", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "80.515", "wer_total": "127.885", "n_error": "47.3", "ppl": "5.15", "accuracy": "62.959", "wer": "36.986", "wps": "77.6", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "10600", "lr": "0.000457024", "gnorm": "3.976", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "27574"}
	[2024-06-20 01:27:04,145][train_inner][INFO] - {"epoch": 1, "update": 0.716, "loss": "2.341", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "80.31", "wer_total": "127.33", "n_error": "46.925", "ppl": "5.07", "accuracy": "63.072", "wer": "36.853", "wps": "77.2", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "10800", "lr": "0.000443536", "gnorm": "3.94", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "27904"}
	[2024-06-20 01:32:34,065][train_inner][INFO] - {"epoch": 1, "update": 0.729, "loss": "2.24", "ntokens": "126.09", "acc_total": "126.09", "n_correct": "80.78", "wer_total": "126.09", "n_error": "45.235", "ppl": "4.73", "accuracy": "64.065", "wer": "35.875", "wps": "76.4", "ups": "0.61", "wpb": "126.1", "bsz": "8", "num_updates": "11000", "lr": "0.000430446", "gnorm": "3.885", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "28234"}
	[2024-06-20 01:38:03,698][train_inner][INFO] - {"epoch": 1, "update": 0.742, "loss": "2.19", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "82.515", "wer_total": "126.625", "n_error": "44.03", "ppl": "4.56", "accuracy": "65.165", "wer": "34.772", "wps": "76.8", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "11200", "lr": "0.000417742", "gnorm": "3.862", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "28563"}
	[2024-06-20 01:43:33,481][train_inner][INFO] - {"epoch": 1, "update": 0.756, "loss": "2.204", "ntokens": "125.82", "acc_total": "125.82", "n_correct": "83.04", "wer_total": "125.82", "n_error": "42.695", "ppl": "4.61", "accuracy": "65.999", "wer": "33.933", "wps": "76.3", "ups": "0.61", "wpb": "125.8", "bsz": "8", "num_updates": "11400", "lr": "0.000405413", "gnorm": "3.891", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "28893"}
	[2024-06-20 01:49:03,205][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "2.202", "ntokens": "126.955", "acc_total": "126.955", "n_correct": "85.175", "wer_total": "126.955", "n_error": "41.675", "ppl": "4.6", "accuracy": "67.091", "wer": "32.827", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "11600", "lr": "0.000393448", "gnorm": "3.87", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "29223"}
	[2024-06-20 01:54:32,979][train_inner][INFO] - {"epoch": 1, "update": 0.782, "loss": "2.156", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "84.635", "wer_total": "126.705", "n_error": "41.985", "ppl": "4.46", "accuracy": "66.797", "wer": "33.136", "wps": "76.8", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "11800", "lr": "0.000381836", "gnorm": "3.849", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "29553"}
	[2024-06-20 02:00:02,822][train_inner][INFO] - {"epoch": 1, "update": 0.796, "loss": "2.115", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "85.43", "wer_total": "126.675", "n_error": "41.205", "ppl": "4.33", "accuracy": "67.44", "wer": "32.528", "wps": "76.8", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "12000", "lr": "0.000370567", "gnorm": "3.664", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "29883"}
	[2024-06-20 02:05:32,802][train_inner][INFO] - {"epoch": 1, "update": 0.809, "loss": "2.126", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "86.38", "wer_total": "126.855", "n_error": "40.425", "ppl": "4.36", "accuracy": "68.093", "wer": "31.867", "wps": "76.9", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "12200", "lr": "0.000359631", "gnorm": "3.844", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "30213"}
	[2024-06-20 02:11:02,715][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "2.111", "ntokens": "126.96", "acc_total": "126.96", "n_correct": "85.225", "wer_total": "126.96", "n_error": "41.665", "ppl": "4.32", "accuracy": "67.127", "wer": "32.817", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "12400", "lr": "0.000349017", "gnorm": "3.774", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "30542"}
	[2024-06-20 02:13:47,468][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 02:55:42,081][valid][INFO] - {"epoch": 1, "valid_loss": "1.821", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "12.8109", "valid_wer_total": "18.1585", "valid_n_error": "5.33968", "valid_ppl": "3.53", "valid_accuracy": "70.551", "valid_wer": "29.406", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "12500", "valid_best_accuracy": "70.551"}
	[2024-06-20 02:55:42,081][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12500 updates
	[2024-06-20 02:55:42,082][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12500.pt
	[2024-06-20 02:55:45,306][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12500.pt
	[2024-06-20 02:55:49,778][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12500.pt (epoch 1 @ 12500 updates, score 70.551) (writing took 7.696423045010306 seconds)
	[2024-06-20 02:58:34,306][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "2.019", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "86.045", "wer_total": "126.58", "n_error": "40.5", "ppl": "4.05", "accuracy": "67.977", "wer": "31.996", "wps": "8.9", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "12600", "lr": "0.000338716", "gnorm": "3.567", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "33394"}
	[2024-06-20 03:04:04,324][train_inner][INFO] - {"epoch": 1, "update": 0.849, "loss": "2.004", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "85.725", "wer_total": "127.335", "n_error": "41.57", "ppl": "4.01", "accuracy": "67.322", "wer": "32.646", "wps": "77.2", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "12800", "lr": "0.00032872", "gnorm": "3.456", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "33724"}
	[2024-06-20 03:09:34,155][train_inner][INFO] - {"epoch": 1, "update": 0.862, "loss": "2.019", "ntokens": "125.99", "acc_total": "125.99", "n_correct": "86.18", "wer_total": "125.99", "n_error": "39.74", "ppl": "4.05", "accuracy": "68.402", "wer": "31.542", "wps": "76.4", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "13000", "lr": "0.000319018", "gnorm": "3.618", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "34054"}
	[2024-06-20 03:15:03,948][train_inner][INFO] - {"epoch": 1, "update": 0.875, "loss": "2.025", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "85.585", "wer_total": "126.805", "n_error": "41.15", "ppl": "4.07", "accuracy": "67.493", "wer": "32.451", "wps": "76.9", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "13200", "lr": "0.000309603", "gnorm": "3.572", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "34384"}
	[2024-06-20 03:20:33,829][train_inner][INFO] - {"epoch": 1, "update": 0.888, "loss": "2.025", "ntokens": "127.435", "acc_total": "127.435", "n_correct": "86.835", "wer_total": "127.435", "n_error": "40.55", "ppl": "4.07", "accuracy": "68.141", "wer": "31.82", "wps": "77.3", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "13400", "lr": "0.000300466", "gnorm": "3.549", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "34714"}
	[2024-06-20 03:26:03,548][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "1.927", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "87.05", "wer_total": "127.015", "n_error": "39.91", "ppl": "3.8", "accuracy": "68.535", "wer": "31.421", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "13600", "lr": "0.000291598", "gnorm": "3.228", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "35043"}
	[2024-06-20 03:29:31,312][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-20 03:31:34,976][train_inner][INFO] - {"epoch": 1, "update": 0.915, "loss": "1.884", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "88.5", "wer_total": "126.945", "n_error": "38.37", "ppl": "3.69", "accuracy": "69.715", "wer": "30.226", "wps": "76.6", "ups": "0.6", "wpb": "126.9", "bsz": "8", "num_updates": "13800", "lr": "0.000282992", "gnorm": "3.505", "loss_scale": "1024", "train_wall": "331", "gb_free": "7.1", "wall": "35375"}
	[2024-06-20 03:37:04,789][train_inner][INFO] - {"epoch": 1, "update": 0.928, "loss": "1.912", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "89.135", "wer_total": "127.135", "n_error": "37.955", "ppl": "3.76", "accuracy": "70.111", "wer": "29.854", "wps": "77.1", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "14000", "lr": "0.00027464", "gnorm": "3.471", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "35705"}
	[2024-06-20 03:42:34,609][train_inner][INFO] - {"epoch": 1, "update": 0.941, "loss": "1.885", "ntokens": "126.005", "acc_total": "126.005", "n_correct": "88.385", "wer_total": "126.005", "n_error": "37.59", "ppl": "3.69", "accuracy": "70.144", "wer": "29.832", "wps": "76.4", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "14200", "lr": "0.000266535", "gnorm": "3.665", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "36034"}
	[2024-06-20 03:48:04,481][train_inner][INFO] - {"epoch": 1, "update": 0.955, "loss": "1.883", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "88.72", "wer_total": "126.9", "n_error": "38.105", "ppl": "3.69", "accuracy": "69.913", "wer": "30.028", "wps": "76.9", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "14400", "lr": "0.000258668", "gnorm": "3.449", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "36364"}
	[2024-06-20 03:53:34,481][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "1.842", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "88.5", "wer_total": "127.485", "n_error": "38.945", "ppl": "3.59", "accuracy": "69.42", "wer": "30.549", "wps": "77.3", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "14600", "lr": "0.000251034", "gnorm": "3.447", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "36694"}
	[2024-06-20 03:59:04,292][train_inner][INFO] - {"epoch": 1, "update": 0.981, "loss": "1.874", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "87.835", "wer_total": "127.26", "n_error": "39.39", "ppl": "3.67", "accuracy": "69.02", "wer": "30.952", "wps": "77.2", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "14800", "lr": "0.000243626", "gnorm": "3.315", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "37024"}
	[2024-06-20 04:04:34,091][train_inner][INFO] - {"epoch": 1, "update": 0.994, "loss": "1.835", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "88.86", "wer_total": "127.385", "n_error": "38.49", "ppl": "3.57", "accuracy": "69.757", "wer": "30.215", "wps": "77.3", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "15000", "lr": "0.000236435", "gnorm": "3.295", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "37354"}
	[2024-06-20 04:04:34,092][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 04:46:26,738][valid][INFO] - {"epoch": 1, "valid_loss": "1.604", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.5669", "valid_wer_total": "18.1585", "valid_n_error": "4.58579", "valid_ppl": "3.04", "valid_accuracy": "74.714", "valid_wer": "25.254", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "74.714"}
	[2024-06-20 04:46:26,739][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15000 updates
	[2024-06-20 04:46:26,739][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_15000.pt
	[2024-06-20 04:46:29,941][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_15000.pt
	[2024-06-20 04:46:34,382][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_15000.pt (epoch 1 @ 15000 updates, score 74.714) (writing took 7.643271347042173 seconds)
	[2024-06-20 04:48:52,092][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 05:30:46,079][valid][INFO] - {"epoch": 1, "valid_loss": "1.606", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.8873", "valid_wer_total": "18.1585", "valid_n_error": "4.26632", "valid_ppl": "3.04", "valid_accuracy": "76.478", "valid_wer": "23.495", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15084", "valid_best_accuracy": "76.478"}
	[2024-06-20 05:30:46,079][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15084 updates
	[2024-06-20 05:30:46,080][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_best.pt
	[2024-06-20 05:30:49,963][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_best.pt
	[2024-06-20 05:30:52,414][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 1 @ 15084 updates, score 76.478) (writing took 6.33428728801664 seconds)
	[2024-06-20 05:30:52,414][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below)
	[2024-06-20 05:30:52,416][train][INFO] - {"epoch": 1, "train_loss": "3.259", "train_ntokens": "126.897", "train_acc_total": "126.897", "train_n_correct": "68.1392", "train_wer_total": "126.897", "train_n_error": "58.6288", "train_ppl": "9.57", "train_accuracy": "53.697", "train_wer": "46.202", "train_wps": "45", "train_ups": "0.35", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "15084", "train_lr": "0.000233479", "train_gnorm": "3.902", "train_loss_scale": "1024", "train_train_wall": "24826", "train_gb_free": "7.1", "train_wall": "42532"}
	[2024-06-20 05:30:52,461][fairseq.trainer][INFO] - begin training epoch 2
	[2024-06-20 05:30:52,461][fairseq_cli.train][INFO] - Start iterating over samples
	[2024-06-20 05:34:03,427][train_inner][INFO] - {"epoch": 2, "update": 1.008, "loss": "1.751", "ntokens": "127.05", "acc_total": "127.05", "n_correct": "92.78", "wer_total": "127.05", "n_error": "34.225", "ppl": "3.37", "accuracy": "73.026", "wer": "26.938", "wps": "4.7", "ups": "0.04", "wpb": "127", "bsz": "8", "num_updates": "15200", "lr": "0.000229457", "gnorm": "3.307", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "42723"}
	[2024-06-20 05:39:33,010][train_inner][INFO] - {"epoch": 2, "update": 1.021, "loss": "1.691", "ntokens": "126.32", "acc_total": "126.32", "n_correct": "91.095", "wer_total": "126.32", "n_error": "35.195", "ppl": "3.23", "accuracy": "72.114", "wer": "27.862", "wps": "76.7", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "15400", "lr": "0.000222685", "gnorm": "3.144", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "43053"}
	[2024-06-20 05:45:02,770][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "1.679", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "89.81", "wer_total": "127.015", "n_error": "37.18", "ppl": "3.2", "accuracy": "70.708", "wer": "29.272", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "15600", "lr": "0.000216113", "gnorm": "3.363", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "43383"}
	[2024-06-20 05:50:32,382][train_inner][INFO] - {"epoch": 2, "update": 1.047, "loss": "1.737", "ntokens": "125.485", "acc_total": "125.485", "n_correct": "88.5", "wer_total": "125.485", "n_error": "36.915", "ppl": "3.33", "accuracy": "70.526", "wer": "29.418", "wps": "76.1", "ups": "0.61", "wpb": "125.5", "bsz": "8", "num_updates": "15800", "lr": "0.000209735", "gnorm": "3.145", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "43712"}
	[2024-06-20 05:56:02,157][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "1.715", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "89.575", "wer_total": "127.465", "n_error": "37.835", "ppl": "3.28", "accuracy": "70.274", "wer": "29.683", "wps": "77.3", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "16000", "lr": "0.000203545", "gnorm": "3.223", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "44042"}
	[2024-06-20 06:01:31,964][train_inner][INFO] - {"epoch": 2, "update": 1.074, "loss": "1.576", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "91.145", "wer_total": "126.905", "n_error": "35.735", "ppl": "2.98", "accuracy": "71.821", "wer": "28.159", "wps": "77", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "16200", "lr": "0.000197538", "gnorm": "3.059", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "44372"}
	[2024-06-20 06:07:01,749][train_inner][INFO] - {"epoch": 2, "update": 1.087, "loss": "1.606", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "91.33", "wer_total": "127.11", "n_error": "35.73", "ppl": "3.04", "accuracy": "71.851", "wer": "28.11", "wps": "77.1", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "16400", "lr": "0.000191708", "gnorm": "3.144", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "44702"}
	[2024-06-20 06:12:31,310][train_inner][INFO] - {"epoch": 2, "update": 1.1, "loss": "1.598", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "91.5", "wer_total": "127.485", "n_error": "35.955", "ppl": "3.03", "accuracy": "71.773", "wer": "28.203", "wps": "77.4", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "16600", "lr": "0.00018605", "gnorm": "3.201", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "45031"}
	[2024-06-20 06:18:00,876][train_inner][INFO] - {"epoch": 2, "update": 1.114, "loss": "1.651", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "91.305", "wer_total": "126.46", "n_error": "35.135", "ppl": "3.14", "accuracy": "72.201", "wer": "27.783", "wps": "76.7", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "16800", "lr": "0.000180559", "gnorm": "3.043", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "45361"}
	[2024-06-20 06:23:30,481][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "1.562", "ntokens": "127.87", "acc_total": "127.87", "n_correct": "93.325", "wer_total": "127.87", "n_error": "34.52", "ppl": "2.95", "accuracy": "72.984", "wer": "26.996", "wps": "77.6", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "17000", "lr": "0.00017523", "gnorm": "3.036", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "45690"}
	[2024-06-20 06:29:00,063][train_inner][INFO] - {"epoch": 2, "update": 1.14, "loss": "1.597", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "92.515", "wer_total": "126.18", "n_error": "33.63", "ppl": "3.02", "accuracy": "73.32", "wer": "26.652", "wps": "76.6", "ups": "0.61", "wpb": "126.2", "bsz": "8", "num_updates": "17200", "lr": "0.000170059", "gnorm": "3.047", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "46020"}
	[2024-06-20 06:34:29,715][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "1.612", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "91.71", "wer_total": "127.2", "n_error": "35.46", "ppl": "3.06", "accuracy": "72.099", "wer": "27.877", "wps": "77.2", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "17400", "lr": "0.00016504", "gnorm": "3.103", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "46349"}
	[2024-06-20 06:37:14,541][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 07:19:08,977][valid][INFO] - {"epoch": 2, "valid_loss": "1.486", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.4737", "valid_wer_total": "18.1585", "valid_n_error": "4.67932", "valid_ppl": "2.8", "valid_accuracy": "74.201", "valid_wer": "25.769", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "17500", "valid_best_accuracy": "76.478"}
	[2024-06-20 07:19:08,978][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 17500 updates
	[2024-06-20 07:19:08,978][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_17500.pt
	[2024-06-20 07:19:12,163][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_17500.pt
	[2024-06-20 07:19:14,470][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_17500.pt (epoch 2 @ 17500 updates, score 74.201) (writing took 5.491555307991803 seconds)
	[2024-06-20 07:21:59,046][train_inner][INFO] - {"epoch": 2, "update": 1.167, "loss": "1.605", "ntokens": "126.22", "acc_total": "126.22", "n_correct": "91.66", "wer_total": "126.22", "n_error": "34.54", "ppl": "3.04", "accuracy": "72.619", "wer": "27.365", "wps": "8.9", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "17600", "lr": "0.000160169", "gnorm": "3.069", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "49199"}
	[2024-06-20 07:27:28,902][train_inner][INFO] - {"epoch": 2, "update": 1.18, "loss": "1.512", "ntokens": "128.305", "acc_total": "128.305", "n_correct": "93.265", "wer_total": "128.305", "n_error": "35.02", "ppl": "2.85", "accuracy": "72.69", "wer": "27.294", "wps": "77.8", "ups": "0.61", "wpb": "128.3", "bsz": "8", "num_updates": "17800", "lr": "0.000155442", "gnorm": "2.924", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "49529"}
	[2024-06-20 07:30:20,203][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
	[2024-06-20 07:33:00,065][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "1.59", "ntokens": "126.22", "acc_total": "126.22", "n_correct": "91.705", "wer_total": "126.22", "n_error": "34.475", "ppl": "3.01", "accuracy": "72.655", "wer": "27.313", "wps": "76.2", "ups": "0.6", "wpb": "126.2", "bsz": "8", "num_updates": "18000", "lr": "0.000150854", "gnorm": "2.96", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "49860"}
	[2024-06-20 07:33:18,404][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-20 07:39:07,451][train_inner][INFO] - {"epoch": 2, "update": 1.207, "loss": "1.634", "ntokens": "127.69", "acc_total": "127.69", "n_correct": "93.965", "wer_total": "127.69", "n_error": "33.685", "ppl": "3.1", "accuracy": "73.588", "wer": "26.38", "wps": "69.5", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "18200", "lr": "0.000146402", "gnorm": "10.056", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "50227"}
	[2024-06-20 07:45:12,834][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "1.56", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "94.83", "wer_total": "126.27", "n_error": "31.38", "ppl": "2.95", "accuracy": "75.101", "wer": "24.852", "wps": "69.1", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "18400", "lr": "0.000142081", "gnorm": "9.501", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "50593"}
	[2024-06-20 07:51:18,404][train_inner][INFO] - {"epoch": 2, "update": 1.233, "loss": "1.552", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "94.83", "wer_total": "126.87", "n_error": "32.015", "ppl": "2.93", "accuracy": "74.746", "wer": "25.234", "wps": "69.4", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "18600", "lr": "0.000137888", "gnorm": "9.335", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "50958"}
	[2024-06-20 07:57:23,697][train_inner][INFO] - {"epoch": 2, "update": 1.246, "loss": "1.493", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "95.495", "wer_total": "127.155", "n_error": "31.62", "ppl": "2.82", "accuracy": "75.101", "wer": "24.867", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "18800", "lr": "0.000133819", "gnorm": "8.996", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "51323"}
	[2024-06-20 08:03:29,241][train_inner][INFO] - {"epoch": 2, "update": 1.26, "loss": "1.464", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "96.17", "wer_total": "127.695", "n_error": "31.505", "ppl": "2.76", "accuracy": "75.312", "wer": "24.672", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "19000", "lr": "0.000129869", "gnorm": "8.798", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "51689"}
	[2024-06-20 08:09:01,904][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0
	[2024-06-20 08:09:36,611][train_inner][INFO] - {"epoch": 2, "update": 1.273, "loss": "1.525", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "96.75", "wer_total": "127.075", "n_error": "30.32", "ppl": "2.88", "accuracy": "76.136", "wer": "23.86", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "19200", "lr": "0.000126036", "gnorm": "8.692", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "52056"}
	[2024-06-20 08:15:42,193][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "1.378", "ntokens": "125.85", "acc_total": "125.85", "n_correct": "98.005", "wer_total": "125.85", "n_error": "27.825", "ppl": "2.6", "accuracy": "77.874", "wer": "22.11", "wps": "68.8", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "19400", "lr": "0.000122317", "gnorm": "8.134", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "52422"}
	[2024-06-20 08:21:47,508][train_inner][INFO] - {"epoch": 2, "update": 1.3, "loss": "1.416", "ntokens": "125.665", "acc_total": "125.665", "n_correct": "96.36", "wer_total": "125.665", "n_error": "29.295", "ppl": "2.67", "accuracy": "76.68", "wer": "23.312", "wps": "68.8", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "19600", "lr": "0.000118707", "gnorm": "8.634", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "52787"}
	[2024-06-20 08:27:52,918][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "1.378", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "100.195", "wer_total": "127.615", "n_error": "27.415", "ppl": "2.6", "accuracy": "78.513", "wer": "21.483", "wps": "69.8", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "19800", "lr": "0.000115203", "gnorm": "7.907", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "53153"}
	[2024-06-20 08:33:58,395][train_inner][INFO] - {"epoch": 2, "update": 1.326, "loss": "1.413", "ntokens": "126.165", "acc_total": "126.165", "n_correct": "98.19", "wer_total": "126.165", "n_error": "27.945", "ppl": "2.66", "accuracy": "77.827", "wer": "22.15", "wps": "69", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "20000", "lr": "0.000111803", "gnorm": "8.399", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "53518"}
	[2024-06-20 08:33:58,396][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 09:15:52,594][valid][INFO] - {"epoch": 2, "valid_loss": "1.292", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.4303", "valid_wer_total": "18.1585", "valid_n_error": "3.72584", "valid_ppl": "2.45", "valid_accuracy": "79.468", "valid_wer": "20.518", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "20000", "valid_best_accuracy": "79.468"}
	[2024-06-20 09:15:52,595][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 20000 updates
	[2024-06-20 09:15:52,595][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_20000.pt
	[2024-06-20 09:15:55,833][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_20000.pt
	[2024-06-20 09:16:00,144][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_20000.pt (epoch 2 @ 20000 updates, score 79.468) (writing took 7.548747716005892 seconds)
	[2024-06-20 09:22:05,421][train_inner][INFO] - {"epoch": 2, "update": 1.339, "loss": "1.408", "ntokens": "126.37", "acc_total": "126.37", "n_correct": "97.85", "wer_total": "126.37", "n_error": "28.515", "ppl": "2.65", "accuracy": "77.431", "wer": "22.565", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "20200", "lr": "0.000108504", "gnorm": "7.966", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "56405"}
	[2024-06-20 09:28:11,211][train_inner][INFO] - {"epoch": 2, "update": 1.353, "loss": "1.406", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "99.01", "wer_total": "127.095", "n_error": "28.065", "ppl": "2.65", "accuracy": "77.902", "wer": "22.082", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "20400", "lr": "0.000105301", "gnorm": "7.711", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "56771"}
	[2024-06-20 09:34:16,731][train_inner][INFO] - {"epoch": 2, "update": 1.366, "loss": "1.363", "ntokens": "126.5", "acc_total": "126.5", "n_correct": "98.375", "wer_total": "126.5", "n_error": "28.105", "ppl": "2.57", "accuracy": "77.767", "wer": "22.217", "wps": "69.2", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "20600", "lr": "0.000102194", "gnorm": "7.812", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "57137"}
	[2024-06-20 09:40:22,438][train_inner][INFO] - {"epoch": 2, "update": 1.379, "loss": "1.352", "ntokens": "127.265", "acc_total": "127.265", "n_correct": "98.28", "wer_total": "127.265", "n_error": "28.97", "ppl": "2.55", "accuracy": "77.225", "wer": "22.764", "wps": "69.6", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "20800", "lr": "9.91776e-05", "gnorm": "7.828", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "57502"}
	[2024-06-20 09:46:27,964][train_inner][INFO] - {"epoch": 2, "update": 1.392, "loss": "1.339", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "101.12", "wer_total": "127.405", "n_error": "26.265", "ppl": "2.53", "accuracy": "79.369", "wer": "20.615", "wps": "69.7", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "21000", "lr": "9.62506e-05", "gnorm": "7.545", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "57868"}
	[2024-06-20 09:52:33,781][train_inner][INFO] - {"epoch": 2, "update": 1.406, "loss": "1.311", "ntokens": "126.7", "acc_total": "126.7", "n_correct": "98.715", "wer_total": "126.7", "n_error": "27.975", "ppl": "2.48", "accuracy": "77.912", "wer": "22.08", "wps": "69.3", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "21200", "lr": "9.341e-05", "gnorm": "7.836", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "58234"}
	[2024-06-20 09:58:39,472][train_inner][INFO] - {"epoch": 2, "update": 1.419, "loss": "1.293", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "100.42", "wer_total": "127.32", "n_error": "26.88", "ppl": "2.45", "accuracy": "78.872", "wer": "21.112", "wps": "69.6", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "21400", "lr": "9.06532e-05", "gnorm": "7.333", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "58599"}
	[2024-06-20 10:04:45,211][train_inner][INFO] - {"epoch": 2, "update": 1.432, "loss": "1.313", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "98.925", "wer_total": "126.805", "n_error": "27.845", "ppl": "2.48", "accuracy": "78.013", "wer": "21.959", "wps": "69.3", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "21600", "lr": "8.79777e-05", "gnorm": "7.785", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "58965"}
	[2024-06-20 10:10:50,982][train_inner][INFO] - {"epoch": 2, "update": 1.445, "loss": "1.263", "ntokens": "125.555", "acc_total": "125.555", "n_correct": "101.335", "wer_total": "125.555", "n_error": "24.22", "ppl": "2.4", "accuracy": "80.71", "wer": "19.29", "wps": "68.7", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "21800", "lr": "8.53812e-05", "gnorm": "7.244", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "59331"}
	[2024-06-20 10:16:56,853][train_inner][INFO] - {"epoch": 2, "update": 1.459, "loss": "1.33", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "101.785", "wer_total": "127.64", "n_error": "25.835", "ppl": "2.51", "accuracy": "79.744", "wer": "20.241", "wps": "69.8", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "22000", "lr": "8.28614e-05", "gnorm": "7.649", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "59697"}
	[2024-06-20 10:23:02,733][train_inner][INFO] - {"epoch": 2, "update": 1.472, "loss": "1.24", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "102.64", "wer_total": "126.515", "n_error": "23.875", "ppl": "2.36", "accuracy": "81.129", "wer": "18.871", "wps": "69.2", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "22200", "lr": "8.04159e-05", "gnorm": "7.293", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "60063"}
	[2024-06-20 10:29:08,610][train_inner][INFO] - {"epoch": 2, "update": 1.485, "loss": "1.244", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "103.595", "wer_total": "127.11", "n_error": "23.505", "ppl": "2.37", "accuracy": "81.5", "wer": "18.492", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "22400", "lr": "7.80425e-05", "gnorm": "7.321", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "60428"}
	[2024-06-20 10:32:11,558][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 11:14:09,594][valid][INFO] - {"epoch": 2, "valid_loss": "1.151", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.9775", "valid_wer_total": "18.1585", "valid_n_error": "3.17937", "valid_ppl": "2.22", "valid_accuracy": "82.482", "valid_wer": "17.509", "valid_wps": "173", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "22500", "valid_best_accuracy": "82.482"}
	[2024-06-20 11:14:09,595][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 22500 updates
	[2024-06-20 11:14:09,595][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_22500.pt
	[2024-06-20 11:14:12,797][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_22500.pt
	[2024-06-20 11:14:17,172][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_22500.pt (epoch 2 @ 22500 updates, score 82.482) (writing took 7.577381226001307 seconds)
	[2024-06-20 11:17:19,738][train_inner][INFO] - {"epoch": 2, "update": 1.498, "loss": "1.279", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "102.34", "wer_total": "126.78", "n_error": "24.425", "ppl": "2.43", "accuracy": "80.723", "wer": "19.266", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "22600", "lr": "7.57393e-05", "gnorm": "7.46", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "63320"}
	[2024-06-20 11:23:25,729][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "1.247", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "102.115", "wer_total": "126.625", "n_error": "24.5", "ppl": "2.37", "accuracy": "80.644", "wer": "19.348", "wps": "69.2", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "22800", "lr": "7.3504e-05", "gnorm": "7.065", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "63686"}
	[2024-06-20 11:29:31,586][train_inner][INFO] - {"epoch": 2, "update": 1.525, "loss": "1.278", "ntokens": "126.17", "acc_total": "126.17", "n_correct": "101.715", "wer_total": "126.17", "n_error": "24.445", "ppl": "2.43", "accuracy": "80.617", "wer": "19.375", "wps": "69", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "23000", "lr": "7.13346e-05", "gnorm": "7.449", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "64051"}
	[2024-06-20 11:35:37,385][train_inner][INFO] - {"epoch": 2, "update": 1.538, "loss": "1.218", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "103.39", "wer_total": "126.86", "n_error": "23.46", "ppl": "2.33", "accuracy": "81.499", "wer": "18.493", "wps": "69.4", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "23200", "lr": "6.92293e-05", "gnorm": "7.176", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "64417"}
	[2024-06-20 11:41:43,181][train_inner][INFO] - {"epoch": 2, "update": 1.551, "loss": "1.164", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "103.705", "wer_total": "126.31", "n_error": "22.6", "ppl": "2.24", "accuracy": "82.104", "wer": "17.892", "wps": "69.1", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "23400", "lr": "6.71862e-05", "gnorm": "6.983", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "64783"}
	[2024-06-20 11:44:49,842][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-20 11:47:50,913][train_inner][INFO] - {"epoch": 2, "update": 1.565, "loss": "1.289", "ntokens": "126.615", "acc_total": "126.615", "n_correct": "100.855", "wer_total": "126.615", "n_error": "25.755", "ppl": "2.44", "accuracy": "79.655", "wer": "20.341", "wps": "68.9", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "23600", "lr": "6.52033e-05", "gnorm": "7.402", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "65151"}
	[2024-06-20 11:53:56,758][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "1.228", "ntokens": "127.175", "acc_total": "127.175", "n_correct": "102.175", "wer_total": "127.175", "n_error": "24.985", "ppl": "2.34", "accuracy": "80.342", "wer": "19.646", "wps": "69.5", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "23800", "lr": "6.3279e-05", "gnorm": "7.108", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "65517"}
	[2024-06-20 12:00:02,548][train_inner][INFO] - {"epoch": 2, "update": 1.591, "loss": "1.23", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "101.745", "wer_total": "126.775", "n_error": "25.025", "ppl": "2.34", "accuracy": "80.256", "wer": "19.74", "wps": "69.3", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "24000", "lr": "6.14114e-05", "gnorm": "7.351", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "65882"}
	[2024-06-20 12:06:08,516][train_inner][INFO] - {"epoch": 2, "update": 1.605, "loss": "1.203", "ntokens": "126.17", "acc_total": "126.17", "n_correct": "101.76", "wer_total": "126.17", "n_error": "24.4", "ppl": "2.3", "accuracy": "80.653", "wer": "19.339", "wps": "69", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "24200", "lr": "5.9599e-05", "gnorm": "7.014", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "66248"}
	[2024-06-20 12:12:14,382][train_inner][INFO] - {"epoch": 2, "update": 1.618, "loss": "1.21", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "102.445", "wer_total": "127.2", "n_error": "24.74", "ppl": "2.31", "accuracy": "80.539", "wer": "19.45", "wps": "69.5", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "24400", "lr": "5.784e-05", "gnorm": "7.329", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "66614"}
	[2024-06-20 12:18:20,346][train_inner][INFO] - {"epoch": 2, "update": 1.631, "loss": "1.269", "ntokens": "127.38", "acc_total": "127.38", "n_correct": "101.555", "wer_total": "127.38", "n_error": "25.805", "ppl": "2.41", "accuracy": "79.726", "wer": "20.258", "wps": "69.6", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "24600", "lr": "5.6133e-05", "gnorm": "6.988", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "66980"}
	[2024-06-20 12:24:26,461][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "1.186", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "102.765", "wer_total": "127.26", "n_error": "24.48", "ppl": "2.28", "accuracy": "80.752", "wer": "19.236", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "24800", "lr": "5.44763e-05", "gnorm": "6.8", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "67346"}
	[2024-06-20 12:30:32,442][train_inner][INFO] - {"epoch": 2, "update": 1.658, "loss": "1.216", "ntokens": "125.92", "acc_total": "125.92", "n_correct": "101.485", "wer_total": "125.92", "n_error": "24.405", "ppl": "2.32", "accuracy": "80.595", "wer": "19.381", "wps": "68.8", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "25000", "lr": "5.28686e-05", "gnorm": "6.899", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "67712"}
	[2024-06-20 12:30:32,442][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 13:12:31,068][valid][INFO] - {"epoch": 2, "valid_loss": "1.061", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.0593", "valid_wer_total": "18.1585", "valid_n_error": "3.09812", "valid_ppl": "2.09", "valid_accuracy": "82.932", "valid_wer": "17.062", "valid_wps": "173", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "25000", "valid_best_accuracy": "82.932"}
	[2024-06-20 13:12:31,068][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25000 updates
	[2024-06-20 13:12:31,068][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_25000.pt
	[2024-06-20 13:12:34,326][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_25000.pt
	[2024-06-20 13:12:38,690][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_25000.pt (epoch 2 @ 25000 updates, score 82.932) (writing took 7.6217346540652215 seconds)
	[2024-06-20 13:18:44,616][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "1.117", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "102.775", "wer_total": "127.44", "n_error": "24.655", "ppl": "2.17", "accuracy": "80.646", "wer": "19.346", "wps": "8.8", "ups": "0.07", "wpb": "127.4", "bsz": "8", "num_updates": "25200", "lr": "5.13083e-05", "gnorm": "6.663", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "70604"}
	[2024-06-20 13:24:50,968][train_inner][INFO] - {"epoch": 2, "update": 1.684, "loss": "1.184", "ntokens": "125.36", "acc_total": "125.36", "n_correct": "99.345", "wer_total": "125.36", "n_error": "25.99", "ppl": "2.27", "accuracy": "79.248", "wer": "20.732", "wps": "68.4", "ups": "0.55", "wpb": "125.4", "bsz": "8", "num_updates": "25400", "lr": "4.9794e-05", "gnorm": "7.131", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "70971"}
	[2024-06-20 13:30:57,224][train_inner][INFO] - {"epoch": 2, "update": 1.697, "loss": "1.112", "ntokens": "127.07", "acc_total": "127.07", "n_correct": "99.58", "wer_total": "127.07", "n_error": "27.485", "ppl": "2.16", "accuracy": "78.366", "wer": "21.63", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25600", "lr": "4.83244e-05", "gnorm": "6.818", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "71337"}
	[2024-06-20 13:37:03,527][train_inner][INFO] - {"epoch": 2, "update": 1.711, "loss": "1.13", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "100.63", "wer_total": "127.055", "n_error": "26.41", "ppl": "2.19", "accuracy": "79.202", "wer": "20.786", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25800", "lr": "4.68982e-05", "gnorm": "6.879", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "71703"}
	[2024-06-20 13:43:09,786][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "1.177", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "100.285", "wer_total": "126.935", "n_error": "26.64", "ppl": "2.26", "accuracy": "79.005", "wer": "20.987", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "26000", "lr": "4.55141e-05", "gnorm": "7.063", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "72070"}
	[2024-06-20 13:49:15,889][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "1.162", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "99.545", "wer_total": "126.305", "n_error": "26.76", "ppl": "2.24", "accuracy": "78.813", "wer": "21.187", "wps": "69", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "26200", "lr": "4.41708e-05", "gnorm": "7.121", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "72436"}
	[2024-06-20 13:55:22,161][train_inner][INFO] - {"epoch": 2, "update": 1.75, "loss": "1.141", "ntokens": "127.47", "acc_total": "127.47", "n_correct": "100.22", "wer_total": "127.47", "n_error": "27.23", "ppl": "2.21", "accuracy": "78.622", "wer": "21.362", "wps": "69.6", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "26400", "lr": "4.28672e-05", "gnorm": "6.651", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "72802"}
	[2024-06-20 14:01:28,533][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "1.107", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "101.5", "wer_total": "126.6", "n_error": "25.09", "ppl": "2.15", "accuracy": "80.174", "wer": "19.818", "wps": "69.1", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "26600", "lr": "4.16021e-05", "gnorm": "6.887", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "73168"}
	[2024-06-20 14:02:01,519][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-20 14:07:36,404][train_inner][INFO] - {"epoch": 2, "update": 1.777, "loss": "1.095", "ntokens": "126.575", "acc_total": "126.575", "n_correct": "103.23", "wer_total": "126.575", "n_error": "23.335", "ppl": "2.14", "accuracy": "81.556", "wer": "18.436", "wps": "68.8", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "26800", "lr": "4.03743e-05", "gnorm": "6.541", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "73536"}
	[2024-06-20 14:13:42,862][train_inner][INFO] - {"epoch": 2, "update": 1.79, "loss": "1.163", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "102.72", "wer_total": "127.595", "n_error": "24.855", "ppl": "2.24", "accuracy": "80.505", "wer": "19.48", "wps": "69.6", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "27000", "lr": "3.91827e-05", "gnorm": "6.803", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "73903"}
	[2024-06-20 14:19:49,285][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "1.149", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "103.795", "wer_total": "127.94", "n_error": "24.14", "ppl": "2.22", "accuracy": "81.128", "wer": "18.868", "wps": "69.8", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "27200", "lr": "3.80263e-05", "gnorm": "7.022", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "74269"}
	[2024-06-20 14:25:55,529][train_inner][INFO] - {"epoch": 2, "update": 1.817, "loss": "1.154", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "103.43", "wer_total": "126.68", "n_error": "23.25", "ppl": "2.23", "accuracy": "81.647", "wer": "18.353", "wps": "69.2", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "27400", "lr": "3.6904e-05", "gnorm": "6.707", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "74635"}
	[2024-06-20 14:28:58,982][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 15:10:58,032][valid][INFO] - {"epoch": 2, "valid_loss": "1.01", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.9402", "valid_wer_total": "18.1585", "valid_n_error": "3.21713", "valid_ppl": "2.01", "valid_accuracy": "82.277", "valid_wer": "17.717", "valid_wps": "172.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "27500", "valid_best_accuracy": "82.932"}
	[2024-06-20 15:10:58,033][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 27500 updates
	[2024-06-20 15:10:58,033][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_27500.pt
	[2024-06-20 15:11:01,242][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_27500.pt
	[2024-06-20 15:11:03,536][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_27500.pt (epoch 2 @ 27500 updates, score 82.277) (writing took 5.503226906992495 seconds)
	[2024-06-20 15:14:06,424][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "1.126", "ntokens": "126.45", "acc_total": "126.45", "n_correct": "101.425", "wer_total": "126.45", "n_error": "25.01", "ppl": "2.18", "accuracy": "80.21", "wer": "19.779", "wps": "8.7", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "27600", "lr": "3.58149e-05", "gnorm": "7.155", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "77526"}
	[2024-06-20 15:20:12,787][train_inner][INFO] - {"epoch": 2, "update": 1.843, "loss": "1.131", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "102.57", "wer_total": "127.465", "n_error": "24.885", "ppl": "2.19", "accuracy": "80.469", "wer": "19.523", "wps": "69.6", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "27800", "lr": "3.47579e-05", "gnorm": "6.758", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "77893"}
	[2024-06-20 15:26:19,032][train_inner][INFO] - {"epoch": 2, "update": 1.856, "loss": "1.149", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "101.705", "wer_total": "127.49", "n_error": "25.78", "ppl": "2.22", "accuracy": "79.775", "wer": "20.221", "wps": "69.6", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "28000", "lr": "3.37321e-05", "gnorm": "6.95", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "78259"}
	[2024-06-20 15:32:25,374][train_inner][INFO] - {"epoch": 2, "update": 1.87, "loss": "1.117", "ntokens": "127.75", "acc_total": "127.75", "n_correct": "102.48", "wer_total": "127.75", "n_error": "25.265", "ppl": "2.17", "accuracy": "80.219", "wer": "19.777", "wps": "69.7", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "28200", "lr": "3.27365e-05", "gnorm": "6.8", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "78625"}
	[2024-06-20 15:38:31,676][train_inner][INFO] - {"epoch": 2, "update": 1.883, "loss": "1.119", "ntokens": "127.125", "acc_total": "127.125", "n_correct": "102.725", "wer_total": "127.125", "n_error": "24.395", "ppl": "2.17", "accuracy": "80.806", "wer": "19.19", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "28400", "lr": "3.17704e-05", "gnorm": "6.81", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "78991"}
	[2024-06-20 15:44:37,942][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "1.122", "ntokens": "128.015", "acc_total": "128.015", "n_correct": "102.66", "wer_total": "128.015", "n_error": "25.325", "ppl": "2.18", "accuracy": "80.194", "wer": "19.783", "wps": "69.9", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "28600", "lr": "3.08327e-05", "gnorm": "6.672", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "79358"}
	[2024-06-20 15:49:47,361][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-20 15:50:46,001][train_inner][INFO] - {"epoch": 2, "update": 1.91, "loss": "1.103", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "102.11", "wer_total": "126.795", "n_error": "24.675", "ppl": "2.15", "accuracy": "80.532", "wer": "19.461", "wps": "68.9", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "28800", "lr": "2.99228e-05", "gnorm": "6.628", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "79726"}
	[2024-06-20 15:56:52,454][train_inner][INFO] - {"epoch": 2, "update": 1.923, "loss": "1.082", "ntokens": "127.77", "acc_total": "127.77", "n_correct": "103.11", "wer_total": "127.77", "n_error": "24.655", "ppl": "2.12", "accuracy": "80.7", "wer": "19.296", "wps": "69.7", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "29000", "lr": "2.90397e-05", "gnorm": "6.584", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "80092"}
	[2024-06-20 16:02:58,804][train_inner][INFO] - {"epoch": 2, "update": 1.936, "loss": "1.119", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "101.585", "wer_total": "126.785", "n_error": "25.18", "ppl": "2.17", "accuracy": "80.124", "wer": "19.86", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "29200", "lr": "2.81826e-05", "gnorm": "6.997", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "80459"}
	[2024-06-20 16:09:05,155][train_inner][INFO] - {"epoch": 2, "update": 1.949, "loss": "1.113", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "102.96", "wer_total": "127.185", "n_error": "24.22", "ppl": "2.16", "accuracy": "80.953", "wer": "19.043", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "29400", "lr": "2.73509e-05", "gnorm": "6.807", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "80825"}
	[2024-06-20 16:15:11,440][train_inner][INFO] - {"epoch": 2, "update": 1.963, "loss": "1.083", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "102.94", "wer_total": "127.1", "n_error": "24.155", "ppl": "2.12", "accuracy": "80.991", "wer": "19.005", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "29600", "lr": "2.65436e-05", "gnorm": "6.757", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "81191"}
	[2024-06-20 16:21:17,577][train_inner][INFO] - {"epoch": 2, "update": 1.976, "loss": "1.104", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "102.27", "wer_total": "126.785", "n_error": "24.51", "ppl": "2.15", "accuracy": "80.664", "wer": "19.332", "wps": "69.3", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "29800", "lr": "2.57603e-05", "gnorm": "6.856", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "81557"}
	[2024-06-20 16:27:23,984][train_inner][INFO] - {"epoch": 2, "update": 1.989, "loss": "1.102", "ntokens": "126.96", "acc_total": "126.96", "n_correct": "102.22", "wer_total": "126.96", "n_error": "24.73", "ppl": "2.15", "accuracy": "80.514", "wer": "19.479", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "30000", "lr": "2.5e-05", "gnorm": "6.53", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "81924"}
	[2024-06-20 16:27:23,984][fairseq_cli.train][INFO] - Stopping training due to num_updates: 30000 >= max_update: 30000
	[2024-06-20 16:27:23,984][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 17:09:25,847][valid][INFO] - {"epoch": 2, "valid_loss": "0.97", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.1493", "valid_wer_total": "18.1585", "valid_n_error": "3.00813", "valid_ppl": "1.96", "valid_accuracy": "83.428", "valid_wer": "16.566", "valid_wps": "172.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "83.428"}
	[2024-06-20 17:09:25,848][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 30000 updates
	[2024-06-20 17:09:25,848][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_30000.pt
	[2024-06-20 17:09:29,087][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_30000.pt
	[2024-06-20 17:09:33,430][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_30000.pt (epoch 2 @ 30000 updates, score 83.428) (writing took 7.58216175599955 seconds)
	[2024-06-20 17:09:33,460][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below)
	[2024-06-20 17:09:33,462][train][INFO] - {"epoch": 2, "train_loss": "1.322", "train_ntokens": "126.908", "train_acc_total": "126.908", "train_n_correct": "98.9054", "train_wer_total": "126.908", "train_n_error": "27.9842", "train_ppl": "2.5", "train_accuracy": "77.935", "train_wer": "22.051", "train_wps": "45.2", "train_ups": "0.36", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "30000", "train_lr": "2.5e-05", "train_gnorm": "6.554", "train_loss_scale": "1024", "train_train_wall": "26719", "train_gb_free": "6.5", "train_wall": "84453"}
	[2024-06-20 17:09:33,462][fairseq_cli.train][INFO] - done training in 84452.6 seconds