ViAVSP-LLM_v1.2.1 / hydra_train.log

Upload 10 files

2ceb986 verified 7 months ago

206 kB

	[2024-06-20 17:35:22,362][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 30000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 18000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_1', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_1', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video', 'audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 10000, 'hold_steps': 0, 'decay_steps': 20000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 30000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}}
	[2024-06-20 17:35:22,365][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.2.1
	[2024-06-20 17:35:22,365][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_1', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_1', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video', 'audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True}
	[2024-06-20 17:35:24,242][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.2.1
	[2024-06-20 17:35:24,242][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_1', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False}
	[2024-06-20 17:35:24,246][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True}
	[2024-06-20 17:35:30,910][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count(
	(encoder): HubertEncoderWrapper(
	(w2v_model): AVHubertModel(
	(feature_extractor_audio): SubModel(
	(proj): Linear(in_features=104, out_features=1024, bias=True)
	)
	(feature_extractor_video): SubModel(
	(resnet): ResEncoder(
	(frontend3D): Sequential(
	(0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False)
	(1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(2): PReLU(num_parameters=64)
	(3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
	)
	(trunk): ResNet(
	(layer1): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=64)
	(relu2): PReLU(num_parameters=64)
	(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	(1): BasicBlock(
	(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=64)
	(relu2): PReLU(num_parameters=64)
	(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer2): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=128)
	(relu2): PReLU(num_parameters=128)
	(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=128)
	(relu2): PReLU(num_parameters=128)
	(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer3): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=256)
	(relu2): PReLU(num_parameters=256)
	(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=256)
	(relu2): PReLU(num_parameters=256)
	(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer4): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=512)
	(relu2): PReLU(num_parameters=512)
	(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=512)
	(relu2): PReLU(num_parameters=512)
	(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(avgpool): AdaptiveAvgPool2d(output_size=1)
	)
	)
	(proj): Linear(in_features=512, out_features=1024, bias=True)
	)
	(post_extract_proj): Linear(in_features=2048, out_features=1024, bias=True)
	(dropout_input): Dropout(p=0.0, inplace=False)
	(dropout_features): Dropout(p=0.1, inplace=False)
	(encoder): TransformerEncoder(
	(pos_conv): Sequential(
	(0): Conv1d(1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16)
	(1): SamePad()
	(2): GELU(approximate='none')
	)
	(layers): ModuleList(
	(0-23): 24 x TransformerSentenceEncoderLayer(
	(self_attn): MultiheadAttention(
	(dropout_module): FairseqDropout()
	(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
	)
	(dropout1): Dropout(p=0.0, inplace=False)
	(dropout2): Dropout(p=0.1, inplace=False)
	(dropout3): Dropout(p=0.0, inplace=False)
	(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	(fc1): Linear(in_features=1024, out_features=4096, bias=True)
	(fc2): Linear(in_features=4096, out_features=1024, bias=True)
	(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	)
	)
	(layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	)
	(layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
	(final_proj): None
	)
	)
	(decoder): PeftModelForCausalLM(
	(base_model): LoraModel(
	(model): LlamaForCausalLM(
	(model): LlamaModel(
	(embed_tokens): Embedding(46304, 2560, padding_idx=0)
	(layers): ModuleList(
	(0-31): 32 x LlamaDecoderLayer(
	(self_attn): LlamaSdpaAttention(
	(q_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(k_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(v_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(rotary_emb): LlamaRotaryEmbedding()
	)
	(mlp): LlamaMLP(
	(gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
	(up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
	(down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False)
	(act_fn): SiLU()
	)
	(input_layernorm): LlamaRMSNorm()
	(post_attention_layernorm): LlamaRMSNorm()
	)
	)
	(norm): LlamaRMSNorm()
	)
	(lm_head): Linear(in_features=2560, out_features=46304, bias=False)
	)
	)
	)
	(avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True)
	)
	[2024-06-20 17:35:30,916][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask
	[2024-06-20 17:35:30,916][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count
	[2024-06-20 17:35:30,916][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss
	[2024-06-20 17:35:30,919][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424)
	[2024-06-20 17:35:30,921][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0)
	[2024-06-20 17:35:30,922][src.vsp_llm_training][INFO] - Using tokenizer
	[2024-06-20 17:35:30,960][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76
	[2024-06-20 17:35:31,312][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_1/valid.wrd is sequence label. skipped
	[2024-06-20 17:35:31,313][src.vsp_llm_dataset][INFO] - image transform: Compose(
	Normalize(mean=0.0, std=255.0)
	<src.utils_vsp_llm.CenterCrop object at 0x70dbf1005130>
	Normalize(mean=0.421, std=0.165)
	)
	[2024-06-20 17:35:31,313][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
	[2024-06-20 17:35:31,313][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias
	[2024-06-20 17:35:31,498][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias
	[2024-06-20 17:35:31,499][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,500][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,501][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,502][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,503][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,504][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias
	[2024-06-20 17:35:31,505][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias
	[2024-06-20 17:35:31,506][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias
	[2024-06-20 17:35:31,506][fairseq.utils][INFO] - *********************CUDA enviroments for all 1 workers*********************
	[2024-06-20 17:35:31,506][fairseq.utils][INFO] - rank 0: capabilities = 8.6 ; total memory = 15.729 GB ; name = NVIDIA RTX A4000
	[2024-06-20 17:35:31,506][fairseq.utils][INFO] - *********************CUDA enviroments for all 1 workers*********************
	[2024-06-20 17:35:31,506][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs)
	[2024-06-20 17:35:31,506][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1
	[2024-06-20 17:35:31,507][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt
	[2024-06-20 17:35:31,507][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt
	[2024-06-20 17:35:31,507][fairseq.trainer][INFO] - loading train data for epoch 1
	[2024-06-20 17:35:31,507][src.vsp_llm_training][INFO] - Using tokenizer
	[2024-06-20 17:35:31,680][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 120686, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73
	[2024-06-20 17:35:32,038][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_1/train.wrd is sequence label. skipped
	[2024-06-20 17:35:32,038][src.vsp_llm_dataset][INFO] - image transform: Compose(
	Normalize(mean=0.0, std=255.0)
	RandomCrop(size=(88, 88))
	<src.utils_vsp_llm.HorizontalFlip object at 0x70dbf1168c40>
	Normalize(mean=0.421, std=0.165)
	)
	[2024-06-20 17:35:32,038][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
	[2024-06-20 17:35:32,038][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
	[2024-06-20 17:35:35,847][fairseq.trainer][INFO] - begin training epoch 1
	[2024-06-20 17:35:35,847][fairseq_cli.train][INFO] - Start iterating over samples
	[2024-06-20 17:41:03,800][train_inner][INFO] - {"epoch": 1, "update": 0.013, "loss": "7.613", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "18.36", "wer_total": "126.725", "n_error": "108.275", "ppl": "195.78", "accuracy": "14.488", "wer": "85.441", "wps": "77.4", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "200", "lr": "1.49e-05", "gnorm": "8.776", "loss_scale": "128", "train_wall": "327", "gb_free": "7.1", "wall": "332"}
	[2024-06-20 17:46:33,409][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "6.197", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "25.67", "wer_total": "126.93", "n_error": "101.055", "ppl": "73.36", "accuracy": "20.224", "wer": "79.615", "wps": "77", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "400", "lr": "2.48e-05", "gnorm": "3.7", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "662"}
	[2024-06-20 17:52:02,973][train_inner][INFO] - {"epoch": 1, "update": 0.04, "loss": "6.076", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "28.56", "wer_total": "127.015", "n_error": "98.17", "ppl": "67.48", "accuracy": "22.486", "wer": "77.29", "wps": "77.1", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "600", "lr": "3.47e-05", "gnorm": "3.964", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "991"}
	[2024-06-20 17:57:32,690][train_inner][INFO] - {"epoch": 1, "update": 0.053, "loss": "5.869", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "30.65", "wer_total": "126.865", "n_error": "95.97", "ppl": "58.44", "accuracy": "24.16", "wer": "75.647", "wps": "77", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "800", "lr": "4.46e-05", "gnorm": "4.074", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "1321"}
	[2024-06-20 18:03:02,525][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "5.933", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "30.475", "wer_total": "127.025", "n_error": "96.315", "ppl": "61.09", "accuracy": "23.991", "wer": "75.824", "wps": "77", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "1000", "lr": "5.45e-05", "gnorm": "3.805", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "1651"}
	[2024-06-20 18:08:32,255][train_inner][INFO] - {"epoch": 1, "update": 0.08, "loss": "5.882", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "30.94", "wer_total": "127.095", "n_error": "95.93", "ppl": "58.99", "accuracy": "24.344", "wer": "75.479", "wps": "77.1", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "1200", "lr": "6.44e-05", "gnorm": "3.583", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "1981"}
	[2024-06-20 18:14:02,245][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "5.724", "ntokens": "127.62", "acc_total": "127.62", "n_correct": "32.22", "wer_total": "127.62", "n_error": "95.2", "ppl": "52.87", "accuracy": "25.247", "wer": "74.596", "wps": "77.3", "ups": "0.61", "wpb": "127.6", "bsz": "8", "num_updates": "1400", "lr": "7.43e-05", "gnorm": "3.428", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "2311"}
	[2024-06-20 18:19:32,102][train_inner][INFO] - {"epoch": 1, "update": 0.106, "loss": "5.738", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "32.5", "wer_total": "127.41", "n_error": "94.6", "ppl": "53.37", "accuracy": "25.508", "wer": "74.248", "wps": "77.3", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "1600", "lr": "8.42e-05", "gnorm": "3.179", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "2641"}
	[2024-06-20 18:25:02,014][train_inner][INFO] - {"epoch": 1, "update": 0.119, "loss": "5.775", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "32.16", "wer_total": "126.56", "n_error": "94.145", "ppl": "54.77", "accuracy": "25.411", "wer": "74.388", "wps": "76.7", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "1800", "lr": "9.41e-05", "gnorm": "2.973", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "2971"}
	[2024-06-20 18:30:31,759][train_inner][INFO] - {"epoch": 1, "update": 0.133, "loss": "5.675", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "33.405", "wer_total": "126.875", "n_error": "93.245", "ppl": "51.1", "accuracy": "26.329", "wer": "73.494", "wps": "77", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "2000", "lr": "0.000104", "gnorm": "2.853", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "3300"}
	[2024-06-20 18:36:01,420][train_inner][INFO] - {"epoch": 1, "update": 0.146, "loss": "5.588", "ntokens": "128.2", "acc_total": "128.2", "n_correct": "35.09", "wer_total": "128.2", "n_error": "92.79", "ppl": "48.12", "accuracy": "27.371", "wer": "72.379", "wps": "77.8", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "2200", "lr": "0.0001139", "gnorm": "2.819", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "3630"}
	[2024-06-20 18:41:30,537][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "5.478", "ntokens": "127.775", "acc_total": "127.775", "n_correct": "37.185", "wer_total": "127.775", "n_error": "90.35", "ppl": "44.55", "accuracy": "29.102", "wer": "70.71", "wps": "77.6", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "2400", "lr": "0.0001238", "gnorm": "2.968", "loss_scale": "256", "train_wall": "328", "gb_free": "7.1", "wall": "3959"}
	[2024-06-20 18:44:15,157][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 19:26:09,065][valid][INFO] - {"epoch": 1, "valid_loss": "5.219", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "5.80013", "valid_wer_total": "18.1585", "valid_n_error": "12.334", "valid_ppl": "37.24", "valid_accuracy": "31.942", "valid_wer": "67.924", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "2500"}
	[2024-06-20 19:26:09,066][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 2500 updates
	[2024-06-20 19:26:09,066][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_2500.pt
	[2024-06-20 19:26:12,255][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_2500.pt
	[2024-06-20 19:26:15,353][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_2500.pt (epoch 1 @ 2500 updates, score 31.942) (writing took 6.287417058949359 seconds)
	[2024-06-20 19:28:59,551][train_inner][INFO] - {"epoch": 1, "update": 0.172, "loss": "5.282", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "39.975", "wer_total": "126.92", "n_error": "86.69", "ppl": "38.9", "accuracy": "31.496", "wer": "68.303", "wps": "8.9", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "2600", "lr": "0.0001337", "gnorm": "3.353", "loss_scale": "256", "train_wall": "328", "gb_free": "7.1", "wall": "6808"}
	[2024-06-20 19:34:28,732][train_inner][INFO] - {"epoch": 1, "update": 0.186, "loss": "5.139", "ntokens": "125.685", "acc_total": "125.685", "n_correct": "42.14", "wer_total": "125.685", "n_error": "83.3", "ppl": "35.23", "accuracy": "33.528", "wer": "66.277", "wps": "76.4", "ups": "0.61", "wpb": "125.7", "bsz": "8", "num_updates": "2800", "lr": "0.0001436", "gnorm": "3.764", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "7137"}
	[2024-06-20 19:39:57,912][train_inner][INFO] - {"epoch": 1, "update": 0.199, "loss": "4.893", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "46.775", "wer_total": "127.19", "n_error": "80.19", "ppl": "29.71", "accuracy": "36.776", "wer": "63.047", "wps": "77.3", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "3000", "lr": "0.0001535", "gnorm": "4.034", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "7466"}
	[2024-06-20 19:45:27,193][train_inner][INFO] - {"epoch": 1, "update": 0.212, "loss": "4.618", "ntokens": "126.535", "acc_total": "126.535", "n_correct": "50.365", "wer_total": "126.535", "n_error": "75.93", "ppl": "24.56", "accuracy": "39.803", "wer": "60.007", "wps": "76.9", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "3200", "lr": "0.0001634", "gnorm": "4.231", "loss_scale": "256", "train_wall": "329", "gb_free": "7.1", "wall": "7796"}
	[2024-06-20 19:50:56,370][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "4.447", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "53.015", "wer_total": "126.53", "n_error": "73.32", "ppl": "21.81", "accuracy": "41.899", "wer": "57.947", "wps": "76.9", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "3400", "lr": "0.0001733", "gnorm": "4.353", "loss_scale": "256", "train_wall": "328", "gb_free": "7.1", "wall": "8125"}
	[2024-06-20 19:56:25,300][train_inner][INFO] - {"epoch": 1, "update": 0.239, "loss": "4.22", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "56.025", "wer_total": "127.025", "n_error": "70.845", "ppl": "18.64", "accuracy": "44.105", "wer": "55.772", "wps": "77.2", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "3600", "lr": "0.0001832", "gnorm": "4.386", "loss_scale": "256", "train_wall": "328", "gb_free": "7.1", "wall": "8454"}
	[2024-06-20 20:01:54,406][train_inner][INFO] - {"epoch": 1, "update": 0.252, "loss": "4.064", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "57.84", "wer_total": "127.35", "n_error": "69.345", "ppl": "16.72", "accuracy": "45.418", "wer": "54.452", "wps": "77.4", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "3800", "lr": "0.0001931", "gnorm": "4.41", "loss_scale": "256", "train_wall": "328", "gb_free": "7.1", "wall": "8783"}
	[2024-06-20 20:07:23,449][train_inner][INFO] - {"epoch": 1, "update": 0.265, "loss": "3.931", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "59.76", "wer_total": "127.785", "n_error": "67.845", "ppl": "15.25", "accuracy": "46.766", "wer": "53.093", "wps": "77.7", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "4000", "lr": "0.000203", "gnorm": "4.447", "loss_scale": "256", "train_wall": "328", "gb_free": "7.1", "wall": "9112"}
	[2024-06-20 20:12:52,669][train_inner][INFO] - {"epoch": 1, "update": 0.278, "loss": "3.778", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "60.965", "wer_total": "126.255", "n_error": "65.135", "ppl": "13.72", "accuracy": "48.287", "wer": "51.59", "wps": "76.7", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "4200", "lr": "0.0002129", "gnorm": "4.481", "loss_scale": "512", "train_wall": "329", "gb_free": "7.1", "wall": "9441"}
	[2024-06-20 20:18:21,686][train_inner][INFO] - {"epoch": 1, "update": 0.292, "loss": "3.668", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "62.45", "wer_total": "125.9", "n_error": "63.34", "ppl": "12.71", "accuracy": "49.603", "wer": "50.31", "wps": "76.5", "ups": "0.61", "wpb": "125.9", "bsz": "8", "num_updates": "4400", "lr": "0.0002228", "gnorm": "4.467", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "9770"}
	[2024-06-20 20:23:50,843][train_inner][INFO] - {"epoch": 1, "update": 0.305, "loss": "3.515", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "65.1", "wer_total": "127.885", "n_error": "62.64", "ppl": "11.43", "accuracy": "50.905", "wer": "48.982", "wps": "77.7", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "4600", "lr": "0.0002327", "gnorm": "4.368", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "10099"}
	[2024-06-20 20:29:19,929][train_inner][INFO] - {"epoch": 1, "update": 0.318, "loss": "3.445", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "65.11", "wer_total": "126.21", "n_error": "61.025", "ppl": "10.89", "accuracy": "51.589", "wer": "48.352", "wps": "76.7", "ups": "0.61", "wpb": "126.2", "bsz": "8", "num_updates": "4800", "lr": "0.0002426", "gnorm": "4.351", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "10428"}
	[2024-06-20 20:34:48,980][train_inner][INFO] - {"epoch": 1, "update": 0.331, "loss": "3.329", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "66.735", "wer_total": "126.87", "n_error": "60.015", "ppl": "10.05", "accuracy": "52.601", "wer": "47.304", "wps": "77.1", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "5000", "lr": "0.0002525", "gnorm": "4.284", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "10757"}
	[2024-06-20 20:34:48,981][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 21:16:44,575][valid][INFO] - {"epoch": 1, "valid_loss": "2.982", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "10.2962", "valid_wer_total": "18.1585", "valid_n_error": "7.83831", "valid_ppl": "7.9", "valid_accuracy": "56.702", "valid_wer": "43.166", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "5000", "valid_best_accuracy": "56.702"}
	[2024-06-20 21:16:44,575][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 5000 updates
	[2024-06-20 21:16:44,576][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_5000.pt
	[2024-06-20 21:16:47,781][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_5000.pt
	[2024-06-20 21:16:52,182][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_5000.pt (epoch 1 @ 5000 updates, score 56.702) (writing took 7.60618672799319 seconds)
	[2024-06-20 21:22:20,554][train_inner][INFO] - {"epoch": 1, "update": 0.345, "loss": "3.14", "ntokens": "126.51", "acc_total": "126.51", "n_correct": "69.245", "wer_total": "126.51", "n_error": "57.14", "ppl": "8.81", "accuracy": "54.735", "wer": "45.166", "wps": "8.9", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "5200", "lr": "0.0002624", "gnorm": "4.225", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "13609"}
	[2024-06-20 21:27:49,420][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "3.164", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "68.965", "wer_total": "127.425", "n_error": "58.375", "ppl": "8.96", "accuracy": "54.122", "wer": "45.811", "wps": "77.5", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "5400", "lr": "0.0002723", "gnorm": "4.275", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "13938"}
	[2024-06-20 21:33:18,389][train_inner][INFO] - {"epoch": 1, "update": 0.371, "loss": "3.052", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "70.345", "wer_total": "127.52", "n_error": "57", "ppl": "8.29", "accuracy": "55.164", "wer": "44.699", "wps": "77.5", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "5600", "lr": "0.0002822", "gnorm": "4.178", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "14267"}
	[2024-06-20 21:38:47,285][train_inner][INFO] - {"epoch": 1, "update": 0.384, "loss": "3.121", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "69.735", "wer_total": "126.59", "n_error": "56.73", "ppl": "8.7", "accuracy": "55.087", "wer": "44.814", "wps": "77", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "5800", "lr": "0.0002921", "gnorm": "4.163", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "14596"}
	[2024-06-20 21:44:16,036][train_inner][INFO] - {"epoch": 1, "update": 0.398, "loss": "2.993", "ntokens": "127.395", "acc_total": "127.395", "n_correct": "71.345", "wer_total": "127.395", "n_error": "55.89", "ppl": "7.96", "accuracy": "56.003", "wer": "43.871", "wps": "77.5", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "6000", "lr": "0.000302", "gnorm": "4.11", "loss_scale": "512", "train_wall": "328", "gb_free": "7.1", "wall": "14925"}
	[2024-06-20 21:49:44,832][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "2.892", "ntokens": "126.995", "acc_total": "126.995", "n_correct": "72.55", "wer_total": "126.995", "n_error": "54.33", "ppl": "7.42", "accuracy": "57.128", "wer": "42.781", "wps": "77.2", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "6200", "lr": "0.0003119", "gnorm": "4.026", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "15253"}
	[2024-06-20 21:55:13,623][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "2.957", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "71.08", "wer_total": "126.01", "n_error": "54.815", "ppl": "7.77", "accuracy": "56.408", "wer": "43.501", "wps": "76.7", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "6400", "lr": "0.0003218", "gnorm": "4.08", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "15582"}
	[2024-06-20 22:00:42,303][train_inner][INFO] - {"epoch": 1, "update": 0.437, "loss": "2.847", "ntokens": "126.33", "acc_total": "126.33", "n_correct": "72.47", "wer_total": "126.33", "n_error": "53.675", "ppl": "7.19", "accuracy": "57.366", "wer": "42.488", "wps": "76.9", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "6600", "lr": "0.0003317", "gnorm": "4.157", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "15911"}
	[2024-06-20 22:06:11,034][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "2.79", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "73.51", "wer_total": "126.735", "n_error": "53.07", "ppl": "6.91", "accuracy": "58.003", "wer": "41.875", "wps": "77.1", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "6800", "lr": "0.0003416", "gnorm": "4.003", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "16240"}
	[2024-06-20 22:11:39,879][train_inner][INFO] - {"epoch": 1, "update": 0.464, "loss": "2.691", "ntokens": "126.98", "acc_total": "126.98", "n_correct": "75.225", "wer_total": "126.98", "n_error": "51.68", "ppl": "6.46", "accuracy": "59.242", "wer": "40.699", "wps": "77.2", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "7000", "lr": "0.0003515", "gnorm": "3.945", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "16568"}
	[2024-06-20 22:17:08,540][train_inner][INFO] - {"epoch": 1, "update": 0.477, "loss": "2.792", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "73.69", "wer_total": "126.97", "n_error": "53.13", "ppl": "6.93", "accuracy": "58.037", "wer": "41.845", "wps": "77.3", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "7200", "lr": "0.0003614", "gnorm": "4.132", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "16897"}
	[2024-06-20 22:22:37,220][train_inner][INFO] - {"epoch": 1, "update": 0.491, "loss": "2.629", "ntokens": "127.45", "acc_total": "127.45", "n_correct": "76.29", "wer_total": "127.45", "n_error": "51.08", "ppl": "6.18", "accuracy": "59.859", "wer": "40.078", "wps": "77.6", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "7400", "lr": "0.0003713", "gnorm": "3.987", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "17226"}
	[2024-06-20 22:25:21,601][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-20 23:07:13,715][valid][INFO] - {"epoch": 1, "valid_loss": "2.387", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "11.4533", "valid_wer_total": "18.1585", "valid_n_error": "6.69237", "valid_ppl": "5.23", "valid_accuracy": "63.074", "valid_wer": "36.855", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "7500", "valid_best_accuracy": "63.074"}
	[2024-06-20 23:07:13,716][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 7500 updates
	[2024-06-20 23:07:13,716][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_7500.pt
	[2024-06-20 23:07:16,940][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_7500.pt
	[2024-06-20 23:07:22,373][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_7500.pt (epoch 1 @ 7500 updates, score 63.074) (writing took 8.6578335770173 seconds)
	[2024-06-20 23:10:06,263][train_inner][INFO] - {"epoch": 1, "update": 0.504, "loss": "2.641", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "75.59", "wer_total": "127.32", "n_error": "51.635", "ppl": "6.24", "accuracy": "59.37", "wer": "40.555", "wps": "8.9", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "7600", "lr": "0.0003812", "gnorm": "4.052", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "20075"}
	[2024-06-20 23:15:35,022][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "2.629", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "75.855", "wer_total": "127.11", "n_error": "51.135", "ppl": "6.19", "accuracy": "59.677", "wer": "40.229", "wps": "77.3", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "7800", "lr": "0.0003911", "gnorm": "3.862", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "20404"}
	[2024-06-20 23:21:03,807][train_inner][INFO] - {"epoch": 1, "update": 0.53, "loss": "2.596", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "76.255", "wer_total": "126.875", "n_error": "50.505", "ppl": "6.05", "accuracy": "60.102", "wer": "39.807", "wps": "77.2", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "8000", "lr": "0.000401", "gnorm": "3.928", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "20732"}
	[2024-06-20 23:26:32,547][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "2.582", "ntokens": "126.05", "acc_total": "126.05", "n_correct": "76", "wer_total": "126.05", "n_error": "49.99", "ppl": "5.99", "accuracy": "60.294", "wer": "39.659", "wps": "76.7", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "8200", "lr": "0.0004109", "gnorm": "4.025", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "21061"}
	[2024-06-20 23:32:01,264][train_inner][INFO] - {"epoch": 1, "update": 0.557, "loss": "2.521", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "77.28", "wer_total": "126.785", "n_error": "49.43", "ppl": "5.74", "accuracy": "60.954", "wer": "38.987", "wps": "77.1", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "8400", "lr": "0.0004208", "gnorm": "3.917", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "21390"}
	[2024-06-20 23:37:29,938][train_inner][INFO] - {"epoch": 1, "update": 0.57, "loss": "2.521", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "76.74", "wer_total": "126.675", "n_error": "49.835", "ppl": "5.74", "accuracy": "60.58", "wer": "39.341", "wps": "77.1", "ups": "0.61", "wpb": "126.7", "bsz": "8", "num_updates": "8600", "lr": "0.0004307", "gnorm": "4.027", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "21718"}
	[2024-06-20 23:42:58,608][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "2.511", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "77.67", "wer_total": "127.09", "n_error": "49.33", "ppl": "5.7", "accuracy": "61.114", "wer": "38.815", "wps": "77.3", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "8800", "lr": "0.0004406", "gnorm": "3.887", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "22047"}
	[2024-06-20 23:48:27,656][train_inner][INFO] - {"epoch": 1, "update": 0.597, "loss": "2.517", "ntokens": "127.43", "acc_total": "127.43", "n_correct": "77.66", "wer_total": "127.43", "n_error": "49.665", "ppl": "5.72", "accuracy": "60.943", "wer": "38.974", "wps": "77.5", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "9000", "lr": "0.0004505", "gnorm": "3.908", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "22376"}
	[2024-06-20 23:53:56,495][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "2.494", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "78.15", "wer_total": "127.145", "n_error": "48.91", "ppl": "5.63", "accuracy": "61.465", "wer": "38.468", "wps": "77.3", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "9200", "lr": "0.0004604", "gnorm": "4.077", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "22705"}
	[2024-06-20 23:59:25,371][train_inner][INFO] - {"epoch": 1, "update": 0.623, "loss": "2.492", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "76.91", "wer_total": "125.995", "n_error": "49.01", "ppl": "5.63", "accuracy": "61.042", "wer": "38.898", "wps": "76.6", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "9400", "lr": "0.0004703", "gnorm": "4.112", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "23034"}
	[2024-06-21 00:04:54,242][train_inner][INFO] - {"epoch": 1, "update": 0.636, "loss": "2.463", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "78.015", "wer_total": "126.87", "n_error": "48.745", "ppl": "5.51", "accuracy": "61.492", "wer": "38.421", "wps": "77.2", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "9600", "lr": "0.0004802", "gnorm": "4.127", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "23363"}
	[2024-06-21 00:10:23,069][train_inner][INFO] - {"epoch": 1, "update": 0.65, "loss": "2.421", "ntokens": "125.68", "acc_total": "125.68", "n_correct": "77.795", "wer_total": "125.68", "n_error": "47.77", "ppl": "5.35", "accuracy": "61.899", "wer": "38.009", "wps": "76.4", "ups": "0.61", "wpb": "125.7", "bsz": "8", "num_updates": "9800", "lr": "0.0004901", "gnorm": "4.028", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "23692"}
	[2024-06-21 00:15:51,819][train_inner][INFO] - {"epoch": 1, "update": 0.663, "loss": "2.435", "ntokens": "127.56", "acc_total": "127.56", "n_correct": "78.59", "wer_total": "127.56", "n_error": "48.87", "ppl": "5.41", "accuracy": "61.61", "wer": "38.311", "wps": "77.6", "ups": "0.61", "wpb": "127.6", "bsz": "8", "num_updates": "10000", "lr": "0.0005", "gnorm": "4.019", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "24020"}
	[2024-06-21 00:15:51,820][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 00:57:45,610][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "11.8518", "valid_wer_total": "18.1585", "valid_n_error": "6.29141", "valid_ppl": "nan", "valid_accuracy": "65.269", "valid_wer": "34.647", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "10000", "valid_best_accuracy": "65.269"}
	[2024-06-21 00:57:45,611][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 10000 updates
	[2024-06-21 00:57:45,611][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_10000.pt
	[2024-06-21 00:57:48,844][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_10000.pt
	[2024-06-21 00:57:54,173][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_10000.pt (epoch 1 @ 10000 updates, score 65.269) (writing took 8.562231724034064 seconds)
	[2024-06-21 01:03:22,704][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "2.39", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "79.09", "wer_total": "126.605", "n_error": "47.435", "ppl": "5.24", "accuracy": "62.47", "wer": "37.467", "wps": "8.9", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "10200", "lr": "0.000485243", "gnorm": "4.115", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "26871"}
	[2024-06-21 01:06:33,320][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0
	[2024-06-21 01:08:53,119][train_inner][INFO] - {"epoch": 1, "update": 0.689, "loss": "2.373", "ntokens": "127.22", "acc_total": "127.22", "n_correct": "79.84", "wer_total": "127.22", "n_error": "47.26", "ppl": "5.18", "accuracy": "62.757", "wer": "37.148", "wps": "77", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "10400", "lr": "0.000470922", "gnorm": "3.877", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "27202"}
	[2024-06-21 01:14:21,910][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "2.372", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "80.95", "wer_total": "127.885", "n_error": "46.865", "ppl": "5.18", "accuracy": "63.299", "wer": "36.646", "wps": "77.8", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "10600", "lr": "0.000457024", "gnorm": "3.752", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "27530"}
	[2024-06-21 01:19:50,870][train_inner][INFO] - {"epoch": 1, "update": 0.716, "loss": "2.36", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "81.745", "wer_total": "127.33", "n_error": "45.525", "ppl": "5.13", "accuracy": "64.199", "wer": "35.754", "wps": "77.4", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "10800", "lr": "0.000443536", "gnorm": "4.012", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "27859"}
	[2024-06-21 01:20:59,990][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-21 01:25:21,591][train_inner][INFO] - {"epoch": 1, "update": 0.729, "loss": "2.279", "ntokens": "126.11", "acc_total": "126.11", "n_correct": "84.15", "wer_total": "126.11", "n_error": "41.905", "ppl": "4.85", "accuracy": "66.727", "wer": "33.229", "wps": "76.3", "ups": "0.6", "wpb": "126.1", "bsz": "8", "num_updates": "11000", "lr": "0.000430446", "gnorm": "3.762", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "28190"}
	[2024-06-21 01:30:50,338][train_inner][INFO] - {"epoch": 1, "update": 0.743, "loss": "2.222", "ntokens": "126.545", "acc_total": "126.545", "n_correct": "82.045", "wer_total": "126.545", "n_error": "44.405", "ppl": "4.67", "accuracy": "64.835", "wer": "35.09", "wps": "77", "ups": "0.61", "wpb": "126.5", "bsz": "8", "num_updates": "11200", "lr": "0.000417742", "gnorm": "3.846", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "28519"}
	[2024-06-21 01:36:19,188][train_inner][INFO] - {"epoch": 1, "update": 0.756, "loss": "2.216", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "81.305", "wer_total": "125.9", "n_error": "44.49", "ppl": "4.65", "accuracy": "64.579", "wer": "35.338", "wps": "76.6", "ups": "0.61", "wpb": "125.9", "bsz": "8", "num_updates": "11400", "lr": "0.000405413", "gnorm": "3.843", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "28848"}
	[2024-06-21 01:41:48,017][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "2.227", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "85.69", "wer_total": "126.905", "n_error": "41.16", "ppl": "4.68", "accuracy": "67.523", "wer": "32.434", "wps": "77.2", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "11600", "lr": "0.000393448", "gnorm": "3.843", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "29177"}
	[2024-06-21 01:47:16,746][train_inner][INFO] - {"epoch": 1, "update": 0.782, "loss": "2.152", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "85.66", "wer_total": "126.775", "n_error": "41.045", "ppl": "4.45", "accuracy": "67.569", "wer": "32.376", "wps": "77.1", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "11800", "lr": "0.000381836", "gnorm": "3.748", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "29505"}
	[2024-06-21 01:52:45,733][train_inner][INFO] - {"epoch": 1, "update": 0.796, "loss": "2.129", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "86.48", "wer_total": "126.605", "n_error": "40.07", "ppl": "4.37", "accuracy": "68.307", "wer": "31.65", "wps": "77", "ups": "0.61", "wpb": "126.6", "bsz": "8", "num_updates": "12000", "lr": "0.000370567", "gnorm": "3.691", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "29834"}
	[2024-06-21 01:58:14,556][train_inner][INFO] - {"epoch": 1, "update": 0.809, "loss": "2.136", "ntokens": "126.91", "acc_total": "126.91", "n_correct": "87.715", "wer_total": "126.91", "n_error": "39.13", "ppl": "4.4", "accuracy": "69.116", "wer": "30.833", "wps": "77.2", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "12200", "lr": "0.000359631", "gnorm": "3.813", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "30163"}
	[2024-06-21 02:03:43,365][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "2.142", "ntokens": "126.95", "acc_total": "126.95", "n_correct": "84.55", "wer_total": "126.95", "n_error": "42.285", "ppl": "4.41", "accuracy": "66.601", "wer": "33.308", "wps": "77.2", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "12400", "lr": "0.000349017", "gnorm": "3.723", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "30492"}
	[2024-06-21 02:06:27,597][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 02:48:20,966][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "12.709", "valid_wer_total": "18.1585", "valid_n_error": "5.44273", "valid_ppl": "nan", "valid_accuracy": "69.989", "valid_wer": "29.973", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "12500", "valid_best_accuracy": "69.989"}
	[2024-06-21 02:48:20,967][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12500 updates
	[2024-06-21 02:48:20,967][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12500.pt
	[2024-06-21 02:48:24,215][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12500.pt
	[2024-06-21 02:48:28,541][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12500.pt (epoch 1 @ 12500 updates, score 69.989) (writing took 7.57451191090513 seconds)
	[2024-06-21 02:51:12,459][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "2.023", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "84.465", "wer_total": "126.58", "n_error": "42.055", "ppl": "4.07", "accuracy": "66.729", "wer": "33.224", "wps": "8.9", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "12600", "lr": "0.000338716", "gnorm": "3.742", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "33341"}
	[2024-06-21 02:56:41,097][train_inner][INFO] - {"epoch": 1, "update": 0.849, "loss": "2.035", "ntokens": "127.27", "acc_total": "127.27", "n_correct": "85.895", "wer_total": "127.27", "n_error": "41.325", "ppl": "4.1", "accuracy": "67.49", "wer": "32.47", "wps": "77.5", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "12800", "lr": "0.00032872", "gnorm": "3.739", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "33670"}
	[2024-06-21 03:02:09,811][train_inner][INFO] - {"epoch": 1, "update": 0.862, "loss": "2.025", "ntokens": "125.955", "acc_total": "125.955", "n_correct": "86.805", "wer_total": "125.955", "n_error": "39.105", "ppl": "4.07", "accuracy": "68.917", "wer": "31.047", "wps": "76.6", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "13000", "lr": "0.000319018", "gnorm": "3.558", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "33998"}
	[2024-06-21 03:07:38,535][train_inner][INFO] - {"epoch": 1, "update": 0.875, "loss": "2.043", "ntokens": "126.89", "acc_total": "126.89", "n_correct": "88.28", "wer_total": "126.89", "n_error": "38.56", "ppl": "4.12", "accuracy": "69.572", "wer": "30.389", "wps": "77.2", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "13200", "lr": "0.000309603", "gnorm": "3.586", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "34327"}
	[2024-06-21 03:13:07,224][train_inner][INFO] - {"epoch": 1, "update": 0.888, "loss": "2.029", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "88.35", "wer_total": "127.49", "n_error": "39.085", "ppl": "4.08", "accuracy": "69.3", "wer": "30.657", "wps": "77.6", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "13400", "lr": "0.000300466", "gnorm": "3.389", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "34656"}
	[2024-06-21 03:14:52,275][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-21 03:18:37,403][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "1.936", "ntokens": "126.85", "acc_total": "126.85", "n_correct": "87.655", "wer_total": "126.85", "n_error": "39.13", "ppl": "3.83", "accuracy": "69.101", "wer": "30.847", "wps": "76.8", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "13600", "lr": "0.000291598", "gnorm": "3.471", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "34986"}
	[2024-06-21 03:24:06,186][train_inner][INFO] - {"epoch": 1, "update": 0.915, "loss": "1.903", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "89.575", "wer_total": "127.03", "n_error": "37.415", "ppl": "3.74", "accuracy": "70.515", "wer": "29.454", "wps": "77.3", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "13800", "lr": "0.000282992", "gnorm": "3.522", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "35315"}
	[2024-06-21 03:29:34,940][train_inner][INFO] - {"epoch": 1, "update": 0.928, "loss": "1.927", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "92.205", "wer_total": "127.19", "n_error": "34.965", "ppl": "3.8", "accuracy": "72.494", "wer": "27.49", "wps": "77.4", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "14000", "lr": "0.00027464", "gnorm": "3.559", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "35643"}
	[2024-06-21 03:35:03,737][train_inner][INFO] - {"epoch": 1, "update": 0.941, "loss": "1.913", "ntokens": "126.04", "acc_total": "126.04", "n_correct": "91.57", "wer_total": "126.04", "n_error": "34.445", "ppl": "3.77", "accuracy": "72.652", "wer": "27.329", "wps": "76.7", "ups": "0.61", "wpb": "126", "bsz": "8", "num_updates": "14200", "lr": "0.000266535", "gnorm": "3.423", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "35972"}
	[2024-06-21 03:40:32,646][train_inner][INFO] - {"epoch": 1, "update": 0.955, "loss": "1.896", "ntokens": "126.84", "acc_total": "126.84", "n_correct": "90.865", "wer_total": "126.84", "n_error": "35.935", "ppl": "3.72", "accuracy": "71.637", "wer": "28.331", "wps": "77.1", "ups": "0.61", "wpb": "126.8", "bsz": "8", "num_updates": "14400", "lr": "0.000258668", "gnorm": "3.284", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "36301"}
	[2024-06-21 03:46:01,469][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "1.855", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "94.36", "wer_total": "127.515", "n_error": "33.135", "ppl": "3.62", "accuracy": "73.999", "wer": "25.985", "wps": "77.6", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "14600", "lr": "0.000251034", "gnorm": "3.356", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "36630"}
	[2024-06-21 03:51:30,354][train_inner][INFO] - {"epoch": 1, "update": 0.981, "loss": "1.877", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "92.39", "wer_total": "127.235", "n_error": "34.825", "ppl": "3.67", "accuracy": "72.614", "wer": "27.371", "wps": "77.4", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "14800", "lr": "0.000243626", "gnorm": "3.203", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "36959"}
	[2024-06-21 03:56:59,067][train_inner][INFO] - {"epoch": 1, "update": 0.994, "loss": "1.863", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "91.565", "wer_total": "127.245", "n_error": "35.65", "ppl": "3.64", "accuracy": "71.96", "wer": "28.017", "wps": "77.4", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "15000", "lr": "0.000236435", "gnorm": "3.276", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "37288"}
	[2024-06-21 03:56:59,067][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 04:38:51,389][valid][INFO] - {"epoch": 1, "valid_loss": "1.597", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.6125", "valid_wer_total": "18.1585", "valid_n_error": "4.54048", "valid_ppl": "3.03", "valid_accuracy": "74.965", "valid_wer": "25.005", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "74.965"}
	[2024-06-21 04:38:51,390][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15000 updates
	[2024-06-21 04:38:51,390][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_15000.pt
	[2024-06-21 04:38:54,679][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_15000.pt
	[2024-06-21 04:38:59,294][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_15000.pt (epoch 1 @ 15000 updates, score 74.965) (writing took 7.9041474119294435 seconds)
	[2024-06-21 04:41:14,790][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 05:23:06,306][valid][INFO] - {"epoch": 1, "valid_loss": "1.593", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.349", "valid_wer_total": "18.1585", "valid_n_error": "4.80488", "valid_ppl": "3.02", "valid_accuracy": "73.514", "valid_wer": "26.461", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15083", "valid_best_accuracy": "74.965"}
	[2024-06-21 05:23:06,307][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15083 updates
	[2024-06-21 05:23:06,307][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_last.pt
	[2024-06-21 05:23:10,344][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_last.pt
	[2024-06-21 05:23:10,433][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 1 @ 15083 updates, score 73.514) (writing took 4.125772170023993 seconds)
	[2024-06-21 05:23:10,433][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below)
	[2024-06-21 05:23:10,446][train][INFO] - {"epoch": 1, "train_loss": "3.273", "train_ntokens": "126.896", "train_acc_total": "126.896", "train_n_correct": "68.4464", "train_wer_total": "126.896", "train_n_error": "58.3257", "train_ppl": "9.67", "train_accuracy": "53.939", "train_wer": "45.963", "train_wps": "45.1", "train_ups": "0.36", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "15083", "train_lr": "0.000233514", "train_gnorm": "3.909", "train_loss_scale": "1024", "train_train_wall": "24760", "train_gb_free": "7.1", "train_wall": "42459"}
	[2024-06-21 05:23:10,504][fairseq.trainer][INFO] - begin training epoch 2
	[2024-06-21 05:23:10,504][fairseq_cli.train][INFO] - Start iterating over samples
	[2024-06-21 05:26:22,501][train_inner][INFO] - {"epoch": 2, "update": 1.008, "loss": "1.75", "ntokens": "127.23", "acc_total": "127.23", "n_correct": "93.09", "wer_total": "127.23", "n_error": "34.12", "ppl": "3.36", "accuracy": "73.167", "wer": "26.818", "wps": "4.7", "ups": "0.04", "wpb": "127.2", "bsz": "8", "num_updates": "15200", "lr": "0.000229457", "gnorm": "3.209", "loss_scale": "1024", "train_wall": "327", "gb_free": "7.1", "wall": "42651"}
	[2024-06-21 05:31:51,038][train_inner][INFO] - {"epoch": 2, "update": 1.021, "loss": "1.692", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "91.045", "wer_total": "126.325", "n_error": "35.23", "ppl": "3.23", "accuracy": "72.072", "wer": "27.888", "wps": "76.9", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "15400", "lr": "0.000222685", "gnorm": "3.158", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "42980"}
	[2024-06-21 05:37:19,654][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "1.679", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "92.53", "wer_total": "126.915", "n_error": "34.365", "ppl": "3.2", "accuracy": "72.907", "wer": "27.077", "wps": "77.2", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "15600", "lr": "0.000216113", "gnorm": "3.3", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "43308"}
	[2024-06-21 05:42:48,064][train_inner][INFO] - {"epoch": 2, "update": 1.048, "loss": "1.758", "ntokens": "125.545", "acc_total": "125.545", "n_correct": "91.325", "wer_total": "125.545", "n_error": "34.185", "ppl": "3.38", "accuracy": "72.743", "wer": "27.229", "wps": "76.5", "ups": "0.61", "wpb": "125.5", "bsz": "8", "num_updates": "15800", "lr": "0.000209735", "gnorm": "3.178", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "43637"}
	[2024-06-21 05:48:16,776][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "1.724", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "93.35", "wer_total": "127.405", "n_error": "33.99", "ppl": "3.3", "accuracy": "73.27", "wer": "26.679", "wps": "77.5", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "16000", "lr": "0.000203545", "gnorm": "3.228", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "43965"}
	[2024-06-21 05:53:45,525][train_inner][INFO] - {"epoch": 2, "update": 1.074, "loss": "1.584", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "96.21", "wer_total": "126.915", "n_error": "30.685", "ppl": "3", "accuracy": "75.807", "wer": "24.178", "wps": "77.2", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "16200", "lr": "0.000197538", "gnorm": "3.131", "loss_scale": "2048", "train_wall": "328", "gb_free": "7.1", "wall": "44294"}
	[2024-06-21 05:56:59,414][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-21 05:59:15,701][train_inner][INFO] - {"epoch": 2, "update": 1.087, "loss": "1.626", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "95.765", "wer_total": "127.2", "n_error": "31.42", "ppl": "3.09", "accuracy": "75.287", "wer": "24.701", "wps": "77.1", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "16400", "lr": "0.000191708", "gnorm": "3.114", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "44624"}
	[2024-06-21 06:04:44,248][train_inner][INFO] - {"epoch": 2, "update": 1.101, "loss": "1.618", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "96.195", "wer_total": "127.485", "n_error": "31.27", "ppl": "3.07", "accuracy": "75.456", "wer": "24.528", "wps": "77.6", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "16600", "lr": "0.00018605", "gnorm": "3.123", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "44953"}
	[2024-06-21 06:10:12,761][train_inner][INFO] - {"epoch": 2, "update": 1.114, "loss": "1.67", "ntokens": "126.41", "acc_total": "126.41", "n_correct": "95.97", "wer_total": "126.41", "n_error": "30.425", "ppl": "3.18", "accuracy": "75.92", "wer": "24.069", "wps": "77", "ups": "0.61", "wpb": "126.4", "bsz": "8", "num_updates": "16800", "lr": "0.000180559", "gnorm": "3.196", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "45281"}
	[2024-06-21 06:15:41,358][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "1.578", "ntokens": "127.825", "acc_total": "127.825", "n_correct": "98.385", "wer_total": "127.825", "n_error": "29.42", "ppl": "2.99", "accuracy": "76.969", "wer": "23.016", "wps": "77.8", "ups": "0.61", "wpb": "127.8", "bsz": "8", "num_updates": "17000", "lr": "0.00017523", "gnorm": "2.981", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "45610"}
	[2024-06-21 06:21:09,782][train_inner][INFO] - {"epoch": 2, "update": 1.14, "loss": "1.621", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "94.505", "wer_total": "126.18", "n_error": "31.64", "ppl": "3.08", "accuracy": "74.897", "wer": "25.075", "wps": "76.8", "ups": "0.61", "wpb": "126.2", "bsz": "8", "num_updates": "17200", "lr": "0.000170059", "gnorm": "3.092", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "45938"}
	[2024-06-21 06:26:38,320][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "1.62", "ntokens": "127.295", "acc_total": "127.295", "n_correct": "93.99", "wer_total": "127.295", "n_error": "33.27", "ppl": "3.07", "accuracy": "73.836", "wer": "26.136", "wps": "77.5", "ups": "0.61", "wpb": "127.3", "bsz": "8", "num_updates": "17400", "lr": "0.00016504", "gnorm": "3.124", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "46267"}
	[2024-06-21 06:29:22,706][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 07:11:15,365][valid][INFO] - {"epoch": 2, "valid_loss": "1.482", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.1336", "valid_wer_total": "18.1585", "valid_n_error": "4.0208", "valid_ppl": "2.79", "valid_accuracy": "77.835", "valid_wer": "22.143", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "17500", "valid_best_accuracy": "77.835"}
	[2024-06-21 07:11:15,365][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 17500 updates
	[2024-06-21 07:11:15,366][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_17500.pt
	[2024-06-21 07:11:18,553][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_17500.pt
	[2024-06-21 07:11:23,113][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_17500.pt (epoch 2 @ 17500 updates, score 77.835) (writing took 7.747196084004827 seconds)
	[2024-06-21 07:14:07,013][train_inner][INFO] - {"epoch": 2, "update": 1.167, "loss": "1.626", "ntokens": "126.19", "acc_total": "126.19", "n_correct": "95.05", "wer_total": "126.19", "n_error": "31.12", "ppl": "3.09", "accuracy": "75.323", "wer": "24.661", "wps": "8.9", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "17600", "lr": "0.000160169", "gnorm": "3.163", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "49116"}
	[2024-06-21 07:19:35,984][train_inner][INFO] - {"epoch": 2, "update": 1.18, "loss": "1.507", "ntokens": "128.19", "acc_total": "128.19", "n_correct": "99.265", "wer_total": "128.19", "n_error": "28.92", "ppl": "2.84", "accuracy": "77.436", "wer": "22.56", "wps": "77.9", "ups": "0.61", "wpb": "128.2", "bsz": "8", "num_updates": "17800", "lr": "0.000155442", "gnorm": "2.937", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "49444"}
	[2024-06-21 07:25:04,875][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "1.593", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "96.25", "wer_total": "126.265", "n_error": "30.005", "ppl": "3.02", "accuracy": "76.229", "wer": "23.764", "wps": "76.8", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "18000", "lr": "0.000150854", "gnorm": "2.986", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "49773"}
	[2024-06-21 07:31:10,061][train_inner][INFO] - {"epoch": 2, "update": 1.207, "loss": "1.646", "ntokens": "127.715", "acc_total": "127.715", "n_correct": "97.445", "wer_total": "127.715", "n_error": "30.235", "ppl": "3.13", "accuracy": "76.299", "wer": "23.674", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "18200", "lr": "0.000146402", "gnorm": "9.962", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "50139"}
	[2024-06-21 07:37:14,896][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "1.567", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "96.45", "wer_total": "126.27", "n_error": "29.785", "ppl": "2.96", "accuracy": "76.384", "wer": "23.588", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "18400", "lr": "0.000142081", "gnorm": "9.743", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "50503"}
	[2024-06-21 07:43:19,779][train_inner][INFO] - {"epoch": 2, "update": 1.233, "loss": "1.578", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "97.44", "wer_total": "126.87", "n_error": "29.425", "ppl": "2.99", "accuracy": "76.803", "wer": "23.193", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "18600", "lr": "0.000137888", "gnorm": "9.008", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "50868"}
	[2024-06-21 07:43:47,028][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-21 07:49:26,208][train_inner][INFO] - {"epoch": 2, "update": 1.247, "loss": "1.487", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "99.3", "wer_total": "127.185", "n_error": "27.87", "ppl": "2.8", "accuracy": "78.075", "wer": "21.913", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "18800", "lr": "0.000133819", "gnorm": "8.907", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "51235"}
	[2024-06-21 07:55:31,049][train_inner][INFO] - {"epoch": 2, "update": 1.26, "loss": "1.47", "ntokens": "127.745", "acc_total": "127.745", "n_correct": "100.015", "wer_total": "127.745", "n_error": "27.705", "ppl": "2.77", "accuracy": "78.293", "wer": "21.688", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "19000", "lr": "0.000129869", "gnorm": "8.572", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "51600"}
	[2024-06-21 08:01:36,126][train_inner][INFO] - {"epoch": 2, "update": 1.273, "loss": "1.531", "ntokens": "127.085", "acc_total": "127.085", "n_correct": "98.84", "wer_total": "127.085", "n_error": "28.24", "ppl": "2.89", "accuracy": "77.775", "wer": "22.221", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "19200", "lr": "0.000126036", "gnorm": "8.994", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "51965"}
	[2024-06-21 08:07:41,152][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "1.39", "ntokens": "125.85", "acc_total": "125.85", "n_correct": "100.84", "wer_total": "125.85", "n_error": "24.995", "ppl": "2.62", "accuracy": "80.127", "wer": "19.861", "wps": "69", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "19400", "lr": "0.000122317", "gnorm": "8.248", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "52330"}
	[2024-06-21 08:13:46,106][train_inner][INFO] - {"epoch": 2, "update": 1.3, "loss": "1.419", "ntokens": "125.665", "acc_total": "125.665", "n_correct": "99.68", "wer_total": "125.665", "n_error": "25.97", "ppl": "2.67", "accuracy": "79.322", "wer": "20.666", "wps": "68.9", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "19600", "lr": "0.000118707", "gnorm": "8.336", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "52695"}
	[2024-06-21 08:19:50,800][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "1.394", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "101.995", "wer_total": "127.615", "n_error": "25.615", "ppl": "2.63", "accuracy": "79.924", "wer": "20.072", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "19800", "lr": "0.000115203", "gnorm": "7.777", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "53059"}
	[2024-06-21 08:25:55,520][train_inner][INFO] - {"epoch": 2, "update": 1.326, "loss": "1.428", "ntokens": "126.165", "acc_total": "126.165", "n_correct": "100.6", "wer_total": "126.165", "n_error": "25.555", "ppl": "2.69", "accuracy": "79.737", "wer": "20.255", "wps": "69.2", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "20000", "lr": "0.000111803", "gnorm": "8.115", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "53424"}
	[2024-06-21 08:25:55,521][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 09:07:51,912][valid][INFO] - {"epoch": 2, "valid_loss": "1.293", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.7418", "valid_wer_total": "18.1585", "valid_n_error": "3.41401", "valid_ppl": "2.45", "valid_accuracy": "81.184", "valid_wer": "18.801", "valid_wps": "173.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "20000", "valid_best_accuracy": "81.184"}
	[2024-06-21 09:07:51,912][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 20000 updates
	[2024-06-21 09:07:51,913][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_20000.pt
	[2024-06-21 09:07:55,123][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_20000.pt
	[2024-06-21 09:07:59,653][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_20000.pt (epoch 2 @ 20000 updates, score 81.184) (writing took 7.740340469055809 seconds)
	[2024-06-21 09:14:03,967][train_inner][INFO] - {"epoch": 2, "update": 1.339, "loss": "1.406", "ntokens": "126.37", "acc_total": "126.37", "n_correct": "99.895", "wer_total": "126.37", "n_error": "26.46", "ppl": "2.65", "accuracy": "79.05", "wer": "20.939", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "20200", "lr": "0.000108504", "gnorm": "8.15", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "56312"}
	[2024-06-21 09:20:08,778][train_inner][INFO] - {"epoch": 2, "update": 1.353, "loss": "1.41", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "101.73", "wer_total": "127.095", "n_error": "25.355", "ppl": "2.66", "accuracy": "80.042", "wer": "19.95", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "20400", "lr": "0.000105301", "gnorm": "8.219", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "56677"}
	[2024-06-21 09:26:13,191][train_inner][INFO] - {"epoch": 2, "update": 1.366, "loss": "1.371", "ntokens": "126.5", "acc_total": "126.5", "n_correct": "102.005", "wer_total": "126.5", "n_error": "24.48", "ppl": "2.59", "accuracy": "80.636", "wer": "19.352", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "20600", "lr": "0.000102194", "gnorm": "7.826", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "57042"}
	[2024-06-21 09:30:35,736][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-21 09:32:19,700][train_inner][INFO] - {"epoch": 2, "update": 1.379, "loss": "1.351", "ntokens": "127.28", "acc_total": "127.28", "n_correct": "102.975", "wer_total": "127.28", "n_error": "24.28", "ppl": "2.55", "accuracy": "80.904", "wer": "19.076", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "20800", "lr": "9.91776e-05", "gnorm": "7.678", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "57408"}
	[2024-06-21 09:38:24,284][train_inner][INFO] - {"epoch": 2, "update": 1.392, "loss": "1.34", "ntokens": "127.535", "acc_total": "127.535", "n_correct": "103.225", "wer_total": "127.535", "n_error": "24.28", "ppl": "2.53", "accuracy": "80.939", "wer": "19.038", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "21000", "lr": "9.62506e-05", "gnorm": "7.758", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "57773"}
	[2024-06-21 09:44:28,907][train_inner][INFO] - {"epoch": 2, "update": 1.406, "loss": "1.327", "ntokens": "126.615", "acc_total": "126.615", "n_correct": "102.32", "wer_total": "126.615", "n_error": "24.295", "ppl": "2.51", "accuracy": "80.812", "wer": "19.188", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "21200", "lr": "9.341e-05", "gnorm": "7.694", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "58137"}
	[2024-06-21 09:50:33,556][train_inner][INFO] - {"epoch": 2, "update": 1.419, "loss": "1.302", "ntokens": "127.3", "acc_total": "127.3", "n_correct": "103.755", "wer_total": "127.3", "n_error": "23.52", "ppl": "2.47", "accuracy": "81.504", "wer": "18.476", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "21400", "lr": "9.06532e-05", "gnorm": "7.671", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "58502"}
	[2024-06-21 09:56:38,143][train_inner][INFO] - {"epoch": 2, "update": 1.432, "loss": "1.324", "ntokens": "126.845", "acc_total": "126.845", "n_correct": "102.74", "wer_total": "126.845", "n_error": "24.09", "ppl": "2.5", "accuracy": "80.996", "wer": "18.992", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "21600", "lr": "8.79777e-05", "gnorm": "7.807", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "58867"}
	[2024-06-21 10:02:42,779][train_inner][INFO] - {"epoch": 2, "update": 1.445, "loss": "1.269", "ntokens": "125.58", "acc_total": "125.58", "n_correct": "103.405", "wer_total": "125.58", "n_error": "22.17", "ppl": "2.41", "accuracy": "82.342", "wer": "17.654", "wps": "68.9", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "21800", "lr": "8.53812e-05", "gnorm": "7.421", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "59231"}
	[2024-06-21 10:08:47,542][train_inner][INFO] - {"epoch": 2, "update": 1.459, "loss": "1.331", "ntokens": "127.565", "acc_total": "127.565", "n_correct": "103.875", "wer_total": "127.565", "n_error": "23.68", "ppl": "2.52", "accuracy": "81.429", "wer": "18.563", "wps": "69.9", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "22000", "lr": "8.28614e-05", "gnorm": "7.507", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "59596"}
	[2024-06-21 10:14:52,333][train_inner][INFO] - {"epoch": 2, "update": 1.472, "loss": "1.248", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "103.945", "wer_total": "126.58", "n_error": "22.615", "ppl": "2.38", "accuracy": "82.118", "wer": "17.866", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "22200", "lr": "8.04159e-05", "gnorm": "7.122", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "59961"}
	[2024-06-21 10:20:57,141][train_inner][INFO] - {"epoch": 2, "update": 1.485, "loss": "1.253", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "104.4", "wer_total": "127.075", "n_error": "22.655", "ppl": "2.38", "accuracy": "82.156", "wer": "17.828", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "22400", "lr": "7.80425e-05", "gnorm": "7.263", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "60326"}
	[2024-06-21 10:23:59,607][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 11:05:54,868][valid][INFO] - {"epoch": 2, "valid_loss": "1.159", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.1217", "valid_wer_total": "18.1585", "valid_n_error": "3.03535", "valid_ppl": "2.23", "valid_accuracy": "83.276", "valid_wer": "16.716", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "22500", "valid_best_accuracy": "83.276"}
	[2024-06-21 11:05:54,869][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 22500 updates
	[2024-06-21 11:05:54,869][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_22500.pt
	[2024-06-21 11:05:58,092][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_22500.pt
	[2024-06-21 11:06:02,559][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_22500.pt (epoch 2 @ 22500 updates, score 83.276) (writing took 7.690108917071484 seconds)
	[2024-06-21 11:09:04,525][train_inner][INFO] - {"epoch": 2, "update": 1.498, "loss": "1.29", "ntokens": "126.79", "acc_total": "126.79", "n_correct": "103.84", "wer_total": "126.79", "n_error": "22.935", "ppl": "2.44", "accuracy": "81.899", "wer": "18.089", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "22600", "lr": "7.57393e-05", "gnorm": "7.523", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "63213"}
	[2024-06-21 11:15:09,287][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "1.234", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "104.455", "wer_total": "126.58", "n_error": "22.12", "ppl": "2.35", "accuracy": "82.521", "wer": "17.475", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "22800", "lr": "7.3504e-05", "gnorm": "7.095", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "63578"}
	[2024-06-21 11:21:14,088][train_inner][INFO] - {"epoch": 2, "update": 1.525, "loss": "1.287", "ntokens": "126.155", "acc_total": "126.155", "n_correct": "103.32", "wer_total": "126.155", "n_error": "22.825", "ppl": "2.44", "accuracy": "81.899", "wer": "18.093", "wps": "69.2", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "23000", "lr": "7.13346e-05", "gnorm": "7.303", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "63943"}
	[2024-06-21 11:27:18,947][train_inner][INFO] - {"epoch": 2, "update": 1.538, "loss": "1.226", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "104.945", "wer_total": "126.97", "n_error": "22.02", "ppl": "2.34", "accuracy": "82.653", "wer": "17.343", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "23200", "lr": "6.92293e-05", "gnorm": "7.03", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "64307"}
	[2024-06-21 11:33:23,768][train_inner][INFO] - {"epoch": 2, "update": 1.552, "loss": "1.17", "ntokens": "126.29", "acc_total": "126.29", "n_correct": "105.17", "wer_total": "126.29", "n_error": "21.105", "ppl": "2.25", "accuracy": "83.277", "wer": "16.712", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "23400", "lr": "6.71862e-05", "gnorm": "7.26", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "64672"}
	[2024-06-21 11:39:28,656][train_inner][INFO] - {"epoch": 2, "update": 1.565, "loss": "1.3", "ntokens": "126.595", "acc_total": "126.595", "n_correct": "103.385", "wer_total": "126.595", "n_error": "23.205", "ppl": "2.46", "accuracy": "81.666", "wer": "18.33", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "23600", "lr": "6.52033e-05", "gnorm": "7.413", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "65037"}
	[2024-06-21 11:45:33,264][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "1.243", "ntokens": "127.175", "acc_total": "127.175", "n_correct": "104.805", "wer_total": "127.175", "n_error": "22.36", "ppl": "2.37", "accuracy": "82.41", "wer": "17.582", "wps": "69.8", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "23800", "lr": "6.3279e-05", "gnorm": "7.152", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "65402"}
	[2024-06-21 11:51:38,223][train_inner][INFO] - {"epoch": 2, "update": 1.591, "loss": "1.228", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "104.82", "wer_total": "126.775", "n_error": "21.95", "ppl": "2.34", "accuracy": "82.682", "wer": "17.314", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "24000", "lr": "6.14114e-05", "gnorm": "7.143", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "65767"}
	[2024-06-21 11:57:43,296][train_inner][INFO] - {"epoch": 2, "update": 1.605, "loss": "1.215", "ntokens": "126.17", "acc_total": "126.17", "n_correct": "104.08", "wer_total": "126.17", "n_error": "22.07", "ppl": "2.32", "accuracy": "82.492", "wer": "17.492", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "24200", "lr": "5.9599e-05", "gnorm": "7.031", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "66132"}
	[2024-06-21 12:03:48,500][train_inner][INFO] - {"epoch": 2, "update": 1.618, "loss": "1.221", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "105.235", "wer_total": "127.2", "n_error": "21.95", "ppl": "2.33", "accuracy": "82.732", "wer": "17.256", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "24400", "lr": "5.784e-05", "gnorm": "7.1", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "66497"}
	[2024-06-21 12:08:05,819][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-21 12:09:55,440][train_inner][INFO] - {"epoch": 2, "update": 1.631, "loss": "1.266", "ntokens": "127.415", "acc_total": "127.415", "n_correct": "104.55", "wer_total": "127.415", "n_error": "22.86", "ppl": "2.41", "accuracy": "82.055", "wer": "17.941", "wps": "69.4", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "24600", "lr": "5.6133e-05", "gnorm": "6.938", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "66864"}
	[2024-06-21 12:16:00,822][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "1.192", "ntokens": "127.295", "acc_total": "127.295", "n_correct": "105.545", "wer_total": "127.295", "n_error": "21.74", "ppl": "2.28", "accuracy": "82.914", "wer": "17.078", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "24800", "lr": "5.44763e-05", "gnorm": "6.716", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "67229"}
	[2024-06-21 12:22:05,794][train_inner][INFO] - {"epoch": 2, "update": 1.658, "loss": "1.218", "ntokens": "125.91", "acc_total": "125.91", "n_correct": "103.755", "wer_total": "125.91", "n_error": "22.14", "ppl": "2.33", "accuracy": "82.404", "wer": "17.584", "wps": "69", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "25000", "lr": "5.28686e-05", "gnorm": "6.966", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "67594"}
	[2024-06-21 12:22:05,794][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 13:04:02,626][valid][INFO] - {"epoch": 2, "valid_loss": "1.051", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3546", "valid_wer_total": "18.1585", "valid_n_error": "2.80263", "valid_ppl": "2.07", "valid_accuracy": "84.559", "valid_wer": "15.434", "valid_wps": "173.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "25000", "valid_best_accuracy": "84.559"}
	[2024-06-21 13:04:02,627][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 25000 updates
	[2024-06-21 13:04:02,627][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_25000.pt
	[2024-06-21 13:04:05,843][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_25000.pt
	[2024-06-21 13:04:10,357][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_25000.pt (epoch 2 @ 25000 updates, score 84.559) (writing took 7.730699714971706 seconds)
	[2024-06-21 13:10:15,363][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "1.146", "ntokens": "127.435", "acc_total": "127.435", "n_correct": "105.765", "wer_total": "127.435", "n_error": "21.665", "ppl": "2.21", "accuracy": "82.995", "wer": "17.001", "wps": "8.8", "ups": "0.07", "wpb": "127.4", "bsz": "8", "num_updates": "25200", "lr": "5.13083e-05", "gnorm": "6.756", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "70484"}
	[2024-06-21 13:16:20,718][train_inner][INFO] - {"epoch": 2, "update": 1.684, "loss": "1.193", "ntokens": "125.305", "acc_total": "125.305", "n_correct": "103.615", "wer_total": "125.305", "n_error": "21.68", "ppl": "2.29", "accuracy": "82.69", "wer": "17.302", "wps": "68.6", "ups": "0.55", "wpb": "125.3", "bsz": "8", "num_updates": "25400", "lr": "4.9794e-05", "gnorm": "7.152", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "70849"}
	[2024-06-21 13:22:26,101][train_inner][INFO] - {"epoch": 2, "update": 1.697, "loss": "1.131", "ntokens": "127.065", "acc_total": "127.065", "n_correct": "105.58", "wer_total": "127.065", "n_error": "21.485", "ppl": "2.19", "accuracy": "83.091", "wer": "16.909", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25600", "lr": "4.83244e-05", "gnorm": "6.522", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "71215"}
	[2024-06-21 13:28:31,322][train_inner][INFO] - {"epoch": 2, "update": 1.711, "loss": "1.131", "ntokens": "127.065", "acc_total": "127.065", "n_correct": "105.935", "wer_total": "127.065", "n_error": "21.125", "ppl": "2.19", "accuracy": "83.371", "wer": "16.625", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25800", "lr": "4.68982e-05", "gnorm": "6.805", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "71580"}
	[2024-06-21 13:34:36,594][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "1.175", "ntokens": "126.995", "acc_total": "126.995", "n_correct": "105.075", "wer_total": "126.995", "n_error": "21.91", "ppl": "2.26", "accuracy": "82.739", "wer": "17.253", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "26000", "lr": "4.55141e-05", "gnorm": "7.044", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "71945"}
	[2024-06-21 13:40:41,822][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "1.156", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "104.54", "wer_total": "126.31", "n_error": "21.77", "ppl": "2.23", "accuracy": "82.765", "wer": "17.235", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "26200", "lr": "4.41708e-05", "gnorm": "6.992", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "72310"}
	[2024-06-21 13:46:47,244][train_inner][INFO] - {"epoch": 2, "update": 1.75, "loss": "1.155", "ntokens": "127.37", "acc_total": "127.37", "n_correct": "105.12", "wer_total": "127.37", "n_error": "22.245", "ppl": "2.23", "accuracy": "82.531", "wer": "17.465", "wps": "69.7", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "26400", "lr": "4.28672e-05", "gnorm": "6.807", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "72676"}
	[2024-06-21 13:52:52,478][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "1.096", "ntokens": "126.69", "acc_total": "126.69", "n_correct": "106.14", "wer_total": "126.69", "n_error": "20.545", "ppl": "2.14", "accuracy": "83.779", "wer": "16.217", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "26600", "lr": "4.16021e-05", "gnorm": "6.879", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "73041"}
	[2024-06-21 13:58:57,798][train_inner][INFO] - {"epoch": 2, "update": 1.777, "loss": "1.105", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "106.1", "wer_total": "126.605", "n_error": "20.505", "ppl": "2.15", "accuracy": "83.804", "wer": "16.196", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "26800", "lr": "4.03743e-05", "gnorm": "6.505", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "73406"}
	[2024-06-21 14:05:03,277][train_inner][INFO] - {"epoch": 2, "update": 1.79, "loss": "1.175", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "105.415", "wer_total": "127.595", "n_error": "22.175", "ppl": "2.26", "accuracy": "82.617", "wer": "17.379", "wps": "69.8", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "27000", "lr": "3.91827e-05", "gnorm": "6.811", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "73772"}
	[2024-06-21 14:11:08,697][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "1.162", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "106.29", "wer_total": "127.94", "n_error": "21.65", "ppl": "2.24", "accuracy": "83.078", "wer": "16.922", "wps": "70", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "27200", "lr": "3.80263e-05", "gnorm": "7.067", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "74137"}
	[2024-06-21 14:17:14,427][train_inner][INFO] - {"epoch": 2, "update": 1.817, "loss": "1.159", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "105.595", "wer_total": "126.68", "n_error": "21.075", "ppl": "2.23", "accuracy": "83.356", "wer": "16.636", "wps": "69.3", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "27400", "lr": "3.6904e-05", "gnorm": "6.882", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "74503"}
	[2024-06-21 14:20:17,228][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 15:02:18,283][valid][INFO] - {"epoch": 2, "valid_loss": "1.004", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4767", "valid_wer_total": "18.1585", "valid_n_error": "2.68066", "valid_ppl": "2.01", "valid_accuracy": "85.231", "valid_wer": "14.763", "valid_wps": "172.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "27500", "valid_best_accuracy": "85.231"}
	[2024-06-21 15:02:18,284][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 27500 updates
	[2024-06-21 15:02:18,284][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_27500.pt
	[2024-06-21 15:02:21,511][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_27500.pt
	[2024-06-21 15:02:26,031][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_27500.pt (epoch 2 @ 27500 updates, score 85.231) (writing took 7.747155406046659 seconds)
	[2024-06-21 15:05:28,413][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "1.13", "ntokens": "126.45", "acc_total": "126.45", "n_correct": "105.345", "wer_total": "126.45", "n_error": "21.09", "ppl": "2.19", "accuracy": "83.31", "wer": "16.679", "wps": "8.7", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "27600", "lr": "3.58149e-05", "gnorm": "6.818", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "77397"}
	[2024-06-21 15:11:33,876][train_inner][INFO] - {"epoch": 2, "update": 1.843, "loss": "1.144", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "106.345", "wer_total": "127.465", "n_error": "21.115", "ppl": "2.21", "accuracy": "83.431", "wer": "16.565", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "27800", "lr": "3.47579e-05", "gnorm": "6.705", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "77762"}
	[2024-06-21 15:17:39,180][train_inner][INFO] - {"epoch": 2, "update": 1.856, "loss": "1.154", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "105.275", "wer_total": "127.49", "n_error": "22.195", "ppl": "2.23", "accuracy": "82.575", "wer": "17.409", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "28000", "lr": "3.37321e-05", "gnorm": "6.752", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "78128"}
	[2024-06-21 15:23:44,448][train_inner][INFO] - {"epoch": 2, "update": 1.87, "loss": "1.118", "ntokens": "127.75", "acc_total": "127.75", "n_correct": "106.97", "wer_total": "127.75", "n_error": "20.775", "ppl": "2.17", "accuracy": "83.734", "wer": "16.262", "wps": "69.9", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "28200", "lr": "3.27365e-05", "gnorm": "6.881", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "78493"}
	[2024-06-21 15:28:21,782][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-21 15:29:51,281][train_inner][INFO] - {"epoch": 2, "update": 1.883, "loss": "1.116", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "106.785", "wer_total": "127.075", "n_error": "20.29", "ppl": "2.17", "accuracy": "84.033", "wer": "15.967", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "28400", "lr": "3.17704e-05", "gnorm": "6.819", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "78860"}
	[2024-06-21 15:35:56,281][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "1.132", "ntokens": "128.11", "acc_total": "128.11", "n_correct": "107.18", "wer_total": "128.11", "n_error": "20.925", "ppl": "2.19", "accuracy": "83.662", "wer": "16.334", "wps": "70.2", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "28600", "lr": "3.08327e-05", "gnorm": "6.767", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "79225"}
	[2024-06-21 15:42:01,496][train_inner][INFO] - {"epoch": 2, "update": 1.91, "loss": "1.118", "ntokens": "126.685", "acc_total": "126.685", "n_correct": "106.585", "wer_total": "126.685", "n_error": "20.1", "ppl": "2.17", "accuracy": "84.134", "wer": "15.866", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "28800", "lr": "2.99228e-05", "gnorm": "6.637", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "79590"}
	[2024-06-21 15:48:06,752][train_inner][INFO] - {"epoch": 2, "update": 1.923, "loss": "1.072", "ntokens": "127.77", "acc_total": "127.77", "n_correct": "108.02", "wer_total": "127.77", "n_error": "19.725", "ppl": "2.1", "accuracy": "84.543", "wer": "15.438", "wps": "70", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "29000", "lr": "2.90397e-05", "gnorm": "6.663", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "79955"}
	[2024-06-21 15:54:12,050][train_inner][INFO] - {"epoch": 2, "update": 1.936, "loss": "1.116", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "106.255", "wer_total": "126.785", "n_error": "20.52", "ppl": "2.17", "accuracy": "83.807", "wer": "16.185", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "29200", "lr": "2.81826e-05", "gnorm": "6.791", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "80321"}
	[2024-06-21 16:00:17,132][train_inner][INFO] - {"epoch": 2, "update": 1.949, "loss": "1.12", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "106.865", "wer_total": "127.185", "n_error": "20.315", "ppl": "2.17", "accuracy": "84.023", "wer": "15.973", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "29400", "lr": "2.73509e-05", "gnorm": "6.741", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "80686"}
	[2024-06-21 16:06:22,209][train_inner][INFO] - {"epoch": 2, "update": 1.963, "loss": "1.084", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "107.295", "wer_total": "127.1", "n_error": "19.795", "ppl": "2.12", "accuracy": "84.418", "wer": "15.574", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "29600", "lr": "2.65436e-05", "gnorm": "7.154", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "81051"}
	[2024-06-21 16:12:26,946][train_inner][INFO] - {"epoch": 2, "update": 1.976, "loss": "1.111", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "106.695", "wer_total": "126.785", "n_error": "20.085", "ppl": "2.16", "accuracy": "84.154", "wer": "15.842", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "29800", "lr": "2.57603e-05", "gnorm": "6.939", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "81415"}
	[2024-06-21 16:18:31,888][train_inner][INFO] - {"epoch": 2, "update": 1.989, "loss": "1.107", "ntokens": "126.96", "acc_total": "126.96", "n_correct": "106.955", "wer_total": "126.96", "n_error": "19.985", "ppl": "2.15", "accuracy": "84.243", "wer": "15.741", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "30000", "lr": "2.5e-05", "gnorm": "6.843", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "81780"}
	[2024-06-21 16:18:31,888][fairseq_cli.train][INFO] - Stopping training due to num_updates: 30000 >= max_update: 30000
	[2024-06-21 16:18:31,888][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-21 17:00:28,702][valid][INFO] - {"epoch": 2, "valid_loss": "0.961", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.6188", "valid_wer_total": "18.1585", "valid_n_error": "2.53856", "valid_ppl": "1.95", "valid_accuracy": "86.014", "valid_wer": "13.98", "valid_wps": "173.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "86.014"}
	[2024-06-21 17:00:28,702][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 30000 updates
	[2024-06-21 17:00:28,703][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_30000.pt
	[2024-06-21 17:00:31,899][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_30000.pt
	[2024-06-21 17:00:36,331][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_30000.pt (epoch 2 @ 30000 updates, score 86.014) (writing took 7.62819217890501 seconds)
	[2024-06-21 17:00:36,361][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below)
	[2024-06-21 17:00:36,363][train][INFO] - {"epoch": 2, "train_loss": "1.329", "train_ntokens": "126.91", "train_acc_total": "126.91", "train_n_correct": "102.181", "train_wer_total": "126.91", "train_n_error": "24.7148", "train_ppl": "2.51", "train_accuracy": "80.515", "train_wer": "19.474", "train_wps": "45.2", "train_ups": "0.36", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "30000", "train_lr": "2.5e-05", "train_gnorm": "6.553", "train_loss_scale": "1024", "train_train_wall": "26646", "train_gb_free": "6.5", "train_wall": "84305"}
	[2024-06-21 17:00:36,363][fairseq_cli.train][INFO] - done training in 84304.2 seconds