[2024-07-04 04:33:23,567][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 300000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 10000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video', 'audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 100000, 'hold_steps': 0, 'decay_steps': 200000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 300000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} [2024-07-04 04:33:23,569][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-04 04:33:23,569][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video', 'audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True} [2024-07-04 04:33:25,281][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-04 04:33:25,281][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False} [2024-07-04 04:33:25,284][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True} [2024-07-04 04:33:32,435][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count( (encoder): HubertEncoderWrapper( (w2v_model): AVHubertModel( (feature_extractor_audio): SubModel( (proj): Linear(in_features=104, out_features=1024, bias=True) ) (feature_extractor_video): SubModel( (resnet): ResEncoder( (frontend3D): Sequential( (0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False) (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): PReLU(num_parameters=64) (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False) ) (trunk): ResNet( (layer1): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer2): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer3): Sequential( (0): BasicBlock( (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer4): Sequential( (0): BasicBlock( (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) ) ) (proj): Linear(in_features=512, out_features=1024, bias=True) ) (post_extract_proj): Linear(in_features=2048, out_features=1024, bias=True) (dropout_input): Dropout(p=0.0, inplace=False) (dropout_features): Dropout(p=0.1, inplace=False) (encoder): TransformerEncoder( (pos_conv): Sequential( (0): Conv1d(1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16) (1): SamePad() (2): GELU(approximate='none') ) (layers): ModuleList( (0-23): 24 x TransformerSentenceEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (dropout1): Dropout(p=0.0, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) (dropout3): Dropout(p=0.0, inplace=False) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) ) (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True) (final_proj): None ) ) (decoder): PeftModelForCausalLM( (base_model): LoraModel( (model): LlamaForCausalLM( (model): LlamaModel( (embed_tokens): Embedding(46304, 2560, padding_idx=0) (layers): ModuleList( (0-31): 32 x LlamaDecoderLayer( (self_attn): LlamaSdpaAttention( (q_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False) (rotary_emb): LlamaRotaryEmbedding() ) (mlp): LlamaMLP( (gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False) (act_fn): SiLU() ) (input_layernorm): LlamaRMSNorm() (post_attention_layernorm): LlamaRMSNorm() ) ) (norm): LlamaRMSNorm() ) (lm_head): Linear(in_features=2560, out_features=46304, bias=False) ) ) ) (avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True) ) [2024-07-04 04:33:32,440][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask [2024-07-04 04:33:32,440][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count [2024-07-04 04:33:32,440][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss [2024-07-04 04:33:32,443][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424) [2024-07-04 04:33:32,446][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) [2024-07-04 04:33:32,446][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-04 04:33:32,492][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76 [2024-07-04 04:33:32,852][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full/valid.wrd is sequence label. skipped [2024-07-04 04:33:32,852][src.vsp_llm_dataset][INFO] - image transform: Compose( Normalize(mean=0.0, std=255.0) Normalize(mean=0.421, std=0.165) ) [2024-07-04 04:33:32,852][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True, [2024-07-04 04:33:32,852][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1 [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,988][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,989][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias [2024-07-04 04:33:32,990][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,991][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,992][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,993][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,994][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,995][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias [2024-07-04 04:33:32,996][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias [2024-07-04 04:33:32,997][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias [2024-07-04 04:33:32,998][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-04 04:33:32,998][fairseq.utils][INFO] - rank 0: capabilities = 8.6 ; total memory = 15.731 GB ; name = NVIDIA RTX A4000 [2024-07-04 04:33:32,998][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-04 04:33:32,998][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs) [2024-07-04 04:33:32,998][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1 [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt [2024-07-04 04:33:32,998][fairseq.trainer][INFO] - loading train data for epoch 1 [2024-07-04 04:33:32,999][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-05 00:20:46,916][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': None, 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 300000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 10000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video', 'audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 100000, 'hold_steps': 0, 'decay_steps': 200000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 300000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} [2024-07-05 00:20:46,919][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-05 00:20:46,919][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video', 'audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True} [2024-07-05 00:20:48,698][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-05 00:20:48,698][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False} [2024-07-05 00:20:48,702][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True} [2024-07-05 00:20:55,476][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count( (encoder): HubertEncoderWrapper( (w2v_model): AVHubertModel( (feature_extractor_audio): SubModel( (proj): Linear(in_features=104, out_features=1024, bias=True) ) (feature_extractor_video): SubModel( (resnet): ResEncoder( (frontend3D): Sequential( (0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False) (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): PReLU(num_parameters=64) (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False) ) (trunk): ResNet( (layer1): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer2): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer3): Sequential( (0): BasicBlock( (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer4): Sequential( (0): BasicBlock( (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) ) ) (proj): Linear(in_features=512, out_features=1024, bias=True) ) (post_extract_proj): Linear(in_features=2048, out_features=1024, bias=True) (dropout_input): Dropout(p=0.0, inplace=False) (dropout_features): Dropout(p=0.1, inplace=False) (encoder): TransformerEncoder( (pos_conv): Sequential( (0): Conv1d(1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16) (1): SamePad() (2): GELU(approximate='none') ) (layers): ModuleList( (0-23): 24 x TransformerSentenceEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (dropout1): Dropout(p=0.0, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) (dropout3): Dropout(p=0.0, inplace=False) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) ) (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True) (final_proj): None ) ) (decoder): PeftModelForCausalLM( (base_model): LoraModel( (model): LlamaForCausalLM( (model): LlamaModel( (embed_tokens): Embedding(46304, 2560, padding_idx=0) (layers): ModuleList( (0-31): 32 x LlamaDecoderLayer( (self_attn): LlamaSdpaAttention( (q_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False) (rotary_emb): LlamaRotaryEmbedding() ) (mlp): LlamaMLP( (gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False) (act_fn): SiLU() ) (input_layernorm): LlamaRMSNorm() (post_attention_layernorm): LlamaRMSNorm() ) ) (norm): LlamaRMSNorm() ) (lm_head): Linear(in_features=2560, out_features=46304, bias=False) ) ) ) (avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True) ) [2024-07-05 00:20:55,481][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask [2024-07-05 00:20:55,481][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count [2024-07-05 00:20:55,481][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss [2024-07-05 00:20:55,485][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424) [2024-07-05 00:20:55,487][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) [2024-07-05 00:20:55,488][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-05 00:20:55,525][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76 [2024-07-05 00:20:55,873][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full/valid.wrd is sequence label. skipped [2024-07-05 00:20:55,873][src.vsp_llm_dataset][INFO] - image transform: Compose( Normalize(mean=0.0, std=255.0) Normalize(mean=0.421, std=0.165) ) [2024-07-05 00:20:55,873][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True, [2024-07-05 00:20:55,873][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1 [2024-07-05 00:20:56,007][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,008][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,009][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias [2024-07-05 00:20:56,010][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,011][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,012][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,013][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,014][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:20:56,015][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias [2024-07-05 00:20:56,016][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias [2024-07-05 00:20:56,016][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias [2024-07-05 00:20:56,016][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias [2024-07-05 00:20:56,016][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias [2024-07-05 00:20:56,016][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-05 00:20:56,016][fairseq.utils][INFO] - rank 0: capabilities = 8.6 ; total memory = 15.731 GB ; name = NVIDIA RTX A4000 [2024-07-05 00:20:56,016][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-05 00:20:56,016][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs) [2024-07-05 00:20:56,016][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1 [2024-07-05 00:20:56,016][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt [2024-07-05 00:20:56,016][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt [2024-07-05 00:20:56,016][fairseq.trainer][INFO] - loading train data for epoch 1 [2024-07-05 00:20:56,016][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-05 00:21:29,390][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': None, 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 300000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 10000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video', 'audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 100000, 'hold_steps': 0, 'decay_steps': 200000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 300000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} [2024-07-05 00:21:29,393][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-05 00:21:29,393][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video', 'audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True} [2024-07-05 00:21:30,460][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-05 00:21:30,461][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False} [2024-07-05 00:21:30,464][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True} [2024-07-05 00:21:34,718][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count( (encoder): HubertEncoderWrapper( (w2v_model): AVHubertModel( (feature_extractor_audio): SubModel( (proj): Linear(in_features=104, out_features=1024, bias=True) ) (feature_extractor_video): SubModel( (resnet): ResEncoder( (frontend3D): Sequential( (0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False) (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): PReLU(num_parameters=64) (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False) ) (trunk): ResNet( (layer1): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer2): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer3): Sequential( (0): BasicBlock( (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer4): Sequential( (0): BasicBlock( (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) ) ) (proj): Linear(in_features=512, out_features=1024, bias=True) ) (post_extract_proj): Linear(in_features=2048, out_features=1024, bias=True) (dropout_input): Dropout(p=0.0, inplace=False) (dropout_features): Dropout(p=0.1, inplace=False) (encoder): TransformerEncoder( (pos_conv): Sequential( (0): Conv1d(1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16) (1): SamePad() (2): GELU(approximate='none') ) (layers): ModuleList( (0-23): 24 x TransformerSentenceEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (dropout1): Dropout(p=0.0, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) (dropout3): Dropout(p=0.0, inplace=False) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) ) (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True) (final_proj): None ) ) (decoder): PeftModelForCausalLM( (base_model): LoraModel( (model): LlamaForCausalLM( (model): LlamaModel( (embed_tokens): Embedding(46304, 2560, padding_idx=0) (layers): ModuleList( (0-31): 32 x LlamaDecoderLayer( (self_attn): LlamaSdpaAttention( (q_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False) (rotary_emb): LlamaRotaryEmbedding() ) (mlp): LlamaMLP( (gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False) (act_fn): SiLU() ) (input_layernorm): LlamaRMSNorm() (post_attention_layernorm): LlamaRMSNorm() ) ) (norm): LlamaRMSNorm() ) (lm_head): Linear(in_features=2560, out_features=46304, bias=False) ) ) ) (avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True) ) [2024-07-05 00:21:34,724][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask [2024-07-05 00:21:34,724][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count [2024-07-05 00:21:34,724][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss [2024-07-05 00:21:34,727][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424) [2024-07-05 00:21:34,729][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) [2024-07-05 00:21:34,730][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-05 00:21:34,768][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76 [2024-07-05 00:21:35,111][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full/valid.wrd is sequence label. skipped [2024-07-05 00:21:35,111][src.vsp_llm_dataset][INFO] - image transform: Compose( Normalize(mean=0.0, std=255.0) Normalize(mean=0.421, std=0.165) ) [2024-07-05 00:21:35,111][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True, [2024-07-05 00:21:35,111][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1 [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,286][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias [2024-07-05 00:21:35,287][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,288][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,289][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,290][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias [2024-07-05 00:21:35,291][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias [2024-07-05 00:21:35,292][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias [2024-07-05 00:21:35,293][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias [2024-07-05 00:21:35,294][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-05 00:21:35,294][fairseq.utils][INFO] - rank 0: capabilities = 8.6 ; total memory = 15.731 GB ; name = NVIDIA RTX A4000 [2024-07-05 00:21:35,294][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-05 00:21:35,294][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs) [2024-07-05 00:21:35,294][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1 [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt [2024-07-05 00:21:35,294][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt [2024-07-05 00:21:35,295][fairseq.trainer][INFO] - loading train data for epoch 1 [2024-07-05 00:21:35,295][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-06 00:44:06,830][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 300000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 2500, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 10000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video', 'audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 100000, 'hold_steps': 0, 'decay_steps': 200000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 300000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} [2024-07-06 00:44:06,833][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-06 00:44:06,833][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video', 'audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True} [2024-07-06 00:44:08,547][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v2.0 [2024-07-06 00:44:08,547][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False} [2024-07-06 00:44:08,551][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True} [2024-07-06 00:44:15,346][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count( (encoder): HubertEncoderWrapper( (w2v_model): AVHubertModel( (feature_extractor_audio): SubModel( (proj): Linear(in_features=104, out_features=1024, bias=True) ) (feature_extractor_video): SubModel( (resnet): ResEncoder( (frontend3D): Sequential( (0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False) (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): PReLU(num_parameters=64) (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False) ) (trunk): ResNet( (layer1): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=64) (relu2): PReLU(num_parameters=64) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer2): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=128) (relu2): PReLU(num_parameters=128) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer3): Sequential( (0): BasicBlock( (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=256) (relu2): PReLU(num_parameters=256) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer4): Sequential( (0): BasicBlock( (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): PReLU(num_parameters=512) (relu2): PReLU(num_parameters=512) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) ) ) (proj): Linear(in_features=512, out_features=1024, bias=True) ) (post_extract_proj): Linear(in_features=2048, out_features=1024, bias=True) (dropout_input): Dropout(p=0.0, inplace=False) (dropout_features): Dropout(p=0.1, inplace=False) (encoder): TransformerEncoder( (pos_conv): Sequential( (0): Conv1d(1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16) (1): SamePad() (2): GELU(approximate='none') ) (layers): ModuleList( (0-23): 24 x TransformerSentenceEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (dropout1): Dropout(p=0.0, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) (dropout3): Dropout(p=0.0, inplace=False) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) ) (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True) (final_proj): None ) ) (decoder): PeftModelForCausalLM( (base_model): LoraModel( (model): LlamaForCausalLM( (model): LlamaModel( (embed_tokens): Embedding(46304, 2560, padding_idx=0) (layers): ModuleList( (0-31): 32 x LlamaDecoderLayer( (self_attn): LlamaSdpaAttention( (q_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=2560, out_features=16, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=16, out_features=2560, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False) (rotary_emb): LlamaRotaryEmbedding() ) (mlp): LlamaMLP( (gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False) (down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False) (act_fn): SiLU() ) (input_layernorm): LlamaRMSNorm() (post_attention_layernorm): LlamaRMSNorm() ) ) (norm): LlamaRMSNorm() ) (lm_head): Linear(in_features=2560, out_features=46304, bias=False) ) ) ) (avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True) ) [2024-07-06 00:44:15,351][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask [2024-07-06 00:44:15,352][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count [2024-07-06 00:44:15,352][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss [2024-07-06 00:44:15,355][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424) [2024-07-06 00:44:15,357][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) [2024-07-06 00:44:15,358][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-06 00:44:15,395][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76 [2024-07-06 00:44:15,792][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full/valid.wrd is sequence label. skipped [2024-07-06 00:44:15,792][src.vsp_llm_dataset][INFO] - image transform: Compose( Normalize(mean=0.0, std=255.0) Normalize(mean=0.421, std=0.165) ) [2024-07-06 00:44:15,792][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True, [2024-07-06 00:44:15,792][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1 [2024-07-06 00:44:15,934][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,935][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,936][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,937][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,938][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias [2024-07-06 00:44:15,939][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias [2024-07-06 00:44:15,940][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias [2024-07-06 00:44:15,941][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias [2024-07-06 00:44:15,942][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias [2024-07-06 00:44:15,943][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-06 00:44:15,943][fairseq.utils][INFO] - rank 0: capabilities = 8.6 ; total memory = 15.731 GB ; name = NVIDIA RTX A4000 [2024-07-06 00:44:15,943][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2024-07-06 00:44:15,943][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs) [2024-07-06 00:44:15,943][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1 [2024-07-06 00:44:15,943][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt [2024-07-06 00:44:15,944][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt [2024-07-06 00:44:15,944][fairseq.trainer][INFO] - loading train data for epoch 1 [2024-07-06 00:44:15,944][src.vsp_llm_training][INFO] - Using tokenizer [2024-07-06 00:44:17,593][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 1206778, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73 [2024-07-06 00:44:18,302][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/full/train.wrd is sequence label. skipped [2024-07-06 00:44:18,302][src.vsp_llm_dataset][INFO] - image transform: Compose( Normalize(mean=0.0, std=255.0) RandomCrop(size=(88, 88)) Normalize(mean=0.421, std=0.165) ) [2024-07-06 00:44:18,302][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True, [2024-07-06 00:44:18,302][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1 [2024-07-06 00:44:23,817][fairseq.trainer][INFO] - begin training epoch 1 [2024-07-06 00:44:23,817][fairseq_cli.train][INFO] - Start iterating over samples [2024-07-06 00:49:53,426][train_inner][INFO] - {"epoch": 1, "update": 0.001, "loss": "7.916", "ntokens": "125.715", "acc_total": "125.715", "n_correct": "16.195", "wer_total": "125.715", "n_error": "109.46", "ppl": "241.46", "accuracy": "12.882", "wer": "87.07", "wps": "76.5", "ups": "0.61", "wpb": "125.7", "bsz": "8", "num_updates": "200", "lr": "5.99e-06", "gnorm": "10.385", "loss_scale": "128", "train_wall": "329", "gb_free": "7.1", "wall": "337"} [2024-07-06 00:55:24,480][train_inner][INFO] - {"epoch": 1, "update": 0.003, "loss": "6.478", "ntokens": "127.15", "acc_total": "127.15", "n_correct": "23.765", "wer_total": "127.15", "n_error": "103.285", "ppl": "89.12", "accuracy": "18.691", "wer": "81.231", "wps": "76.8", "ups": "0.6", "wpb": "127.2", "bsz": "8", "num_updates": "400", "lr": "6.98e-06", "gnorm": "4.619", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "669"} [2024-07-06 01:00:54,985][train_inner][INFO] - {"epoch": 1, "update": 0.004, "loss": "6.238", "ntokens": "125.945", "acc_total": "125.945", "n_correct": "25.85", "wer_total": "125.945", "n_error": "99.935", "ppl": "75.46", "accuracy": "20.525", "wer": "79.348", "wps": "76.2", "ups": "0.61", "wpb": "125.9", "bsz": "8", "num_updates": "600", "lr": "7.97e-06", "gnorm": "3.666", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "999"} [2024-07-06 01:06:25,734][train_inner][INFO] - {"epoch": 1, "update": 0.005, "loss": "6.079", "ntokens": "125.875", "acc_total": "125.875", "n_correct": "27.74", "wer_total": "125.875", "n_error": "97.94", "ppl": "67.61", "accuracy": "22.038", "wer": "77.807", "wps": "76.1", "ups": "0.6", "wpb": "125.9", "bsz": "8", "num_updates": "800", "lr": "8.96e-06", "gnorm": "3.839", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "1330"} [2024-07-06 01:11:56,516][train_inner][INFO] - {"epoch": 1, "update": 0.007, "loss": "5.982", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "29.145", "wer_total": "127.09", "n_error": "97.66", "ppl": "63.21", "accuracy": "22.933", "wer": "76.843", "wps": "76.8", "ups": "0.6", "wpb": "127.1", "bsz": "8", "num_updates": "1000", "lr": "9.95e-06", "gnorm": "4.196", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "1661"} [2024-07-06 01:17:27,288][train_inner][INFO] - {"epoch": 1, "update": 0.008, "loss": "5.996", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "29.46", "wer_total": "126.785", "n_error": "97.005", "ppl": "63.8", "accuracy": "23.236", "wer": "76.511", "wps": "76.7", "ups": "0.6", "wpb": "126.8", "bsz": "8", "num_updates": "1200", "lr": "1.094e-05", "gnorm": "4.456", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "1991"} [2024-07-06 01:22:58,047][train_inner][INFO] - {"epoch": 1, "update": 0.009, "loss": "5.967", "ntokens": "127.635", "acc_total": "127.635", "n_correct": "30.075", "wer_total": "127.635", "n_error": "97.245", "ppl": "62.56", "accuracy": "23.563", "wer": "76.19", "wps": "77.2", "ups": "0.6", "wpb": "127.6", "bsz": "8", "num_updates": "1400", "lr": "1.193e-05", "gnorm": "4.581", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "2322"} [2024-07-06 01:28:28,823][train_inner][INFO] - {"epoch": 1, "update": 0.011, "loss": "5.857", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "30.545", "wer_total": "126.92", "n_error": "96.1", "ppl": "57.97", "accuracy": "24.066", "wer": "75.717", "wps": "76.7", "ups": "0.6", "wpb": "126.9", "bsz": "8", "num_updates": "1600", "lr": "1.292e-05", "gnorm": "4.671", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "2653"} [2024-07-06 01:33:59,714][train_inner][INFO] - {"epoch": 1, "update": 0.012, "loss": "5.828", "ntokens": "125.79", "acc_total": "125.79", "n_correct": "30.96", "wer_total": "125.79", "n_error": "94.55", "ppl": "56.8", "accuracy": "24.612", "wer": "75.165", "wps": "76", "ups": "0.6", "wpb": "125.8", "bsz": "8", "num_updates": "1800", "lr": "1.391e-05", "gnorm": "5.05", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "2984"} [2024-07-06 01:39:30,546][train_inner][INFO] - {"epoch": 1, "update": 0.013, "loss": "5.845", "ntokens": "128.27", "acc_total": "128.27", "n_correct": "31.21", "wer_total": "128.27", "n_error": "96.795", "ppl": "57.46", "accuracy": "24.331", "wer": "75.462", "wps": "77.5", "ups": "0.6", "wpb": "128.3", "bsz": "8", "num_updates": "2000", "lr": "1.49e-05", "gnorm": "5.065", "loss_scale": "128", "train_wall": "330", "gb_free": "7.1", "wall": "3315"} [2024-07-06 01:45:01,415][train_inner][INFO] - {"epoch": 1, "update": 0.015, "loss": "5.844", "ntokens": "127.43", "acc_total": "127.43", "n_correct": "30.925", "wer_total": "127.43", "n_error": "96.325", "ppl": "57.43", "accuracy": "24.268", "wer": "75.591", "wps": "77", "ups": "0.6", "wpb": "127.4", "bsz": "8", "num_updates": "2200", "lr": "1.589e-05", "gnorm": "4.905", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "3645"} [2024-07-06 01:50:32,205][train_inner][INFO] - {"epoch": 1, "update": 0.016, "loss": "5.769", "ntokens": "127.255", "acc_total": "127.255", "n_correct": "31.335", "wer_total": "127.255", "n_error": "95.675", "ppl": "54.53", "accuracy": "24.624", "wer": "75.184", "wps": "76.9", "ups": "0.6", "wpb": "127.3", "bsz": "8", "num_updates": "2400", "lr": "1.688e-05", "gnorm": "5.093", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "3976"} [2024-07-06 01:53:17,371][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 02:35:06,706][valid][INFO] - {"epoch": 1, "valid_loss": "5.688", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "4.594", "valid_wer_total": "18.1585", "valid_n_error": "13.5153", "valid_ppl": "51.55", "valid_accuracy": "25.299", "valid_wer": "74.43", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "2500"} [2024-07-06 02:35:06,706][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 2500 updates [2024-07-06 02:35:06,707][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_2500.pt [2024-07-06 02:35:09,789][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_2500.pt [2024-07-06 02:35:12,515][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_2500.pt (epoch 1 @ 2500 updates, score 25.299) (writing took 5.80829477799125 seconds) [2024-07-06 02:37:57,757][train_inner][INFO] - {"epoch": 1, "update": 0.017, "loss": "5.783", "ntokens": "126.425", "acc_total": "126.425", "n_correct": "31.82", "wer_total": "126.425", "n_error": "94.31", "ppl": "55.05", "accuracy": "25.169", "wer": "74.598", "wps": "8.9", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "2600", "lr": "1.787e-05", "gnorm": "4.91", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "6822"} [2024-07-06 02:43:29,243][train_inner][INFO] - {"epoch": 1, "update": 0.019, "loss": "5.751", "ntokens": "126.355", "acc_total": "126.355", "n_correct": "31.42", "wer_total": "126.355", "n_error": "94.66", "ppl": "53.87", "accuracy": "24.866", "wer": "74.916", "wps": "76.2", "ups": "0.6", "wpb": "126.4", "bsz": "8", "num_updates": "2800", "lr": "1.886e-05", "gnorm": "5.169", "loss_scale": "256", "train_wall": "331", "gb_free": "7.1", "wall": "7153"} [2024-07-06 02:49:00,168][train_inner][INFO] - {"epoch": 1, "update": 0.02, "loss": "5.709", "ntokens": "127.29", "acc_total": "127.29", "n_correct": "32.86", "wer_total": "127.29", "n_error": "94.16", "ppl": "52.3", "accuracy": "25.815", "wer": "73.973", "wps": "76.9", "ups": "0.6", "wpb": "127.3", "bsz": "8", "num_updates": "3000", "lr": "1.985e-05", "gnorm": "5.086", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "7484"} [2024-07-06 02:54:31,095][train_inner][INFO] - {"epoch": 1, "update": 0.021, "loss": "5.76", "ntokens": "125.455", "acc_total": "125.455", "n_correct": "31.56", "wer_total": "125.455", "n_error": "93.515", "ppl": "54.18", "accuracy": "25.156", "wer": "74.541", "wps": "75.8", "ups": "0.6", "wpb": "125.5", "bsz": "8", "num_updates": "3200", "lr": "2.084e-05", "gnorm": "5.198", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "7815"} [2024-07-06 03:00:01,861][train_inner][INFO] - {"epoch": 1, "update": 0.023, "loss": "5.69", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "32.945", "wer_total": "126.93", "n_error": "93.7", "ppl": "51.62", "accuracy": "25.955", "wer": "73.82", "wps": "76.7", "ups": "0.6", "wpb": "126.9", "bsz": "8", "num_updates": "3400", "lr": "2.183e-05", "gnorm": "5.128", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "8146"} [2024-07-06 03:05:32,612][train_inner][INFO] - {"epoch": 1, "update": 0.024, "loss": "5.652", "ntokens": "126.26", "acc_total": "126.26", "n_correct": "33.01", "wer_total": "126.26", "n_error": "93.035", "ppl": "50.29", "accuracy": "26.144", "wer": "73.685", "wps": "76.3", "ups": "0.6", "wpb": "126.3", "bsz": "8", "num_updates": "3600", "lr": "2.282e-05", "gnorm": "5.165", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "8477"} [2024-07-06 03:11:03,797][train_inner][INFO] - {"epoch": 1, "update": 0.025, "loss": "5.646", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "33.015", "wer_total": "127.19", "n_error": "93.905", "ppl": "50.06", "accuracy": "25.957", "wer": "73.83", "wps": "76.8", "ups": "0.6", "wpb": "127.2", "bsz": "8", "num_updates": "3800", "lr": "2.381e-05", "gnorm": "5.059", "loss_scale": "256", "train_wall": "331", "gb_free": "7.1", "wall": "8808"} [2024-07-06 03:16:34,498][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "5.639", "ntokens": "126.25", "acc_total": "126.25", "n_correct": "32.925", "wer_total": "126.25", "n_error": "93.105", "ppl": "49.83", "accuracy": "26.079", "wer": "73.747", "wps": "76.4", "ups": "0.6", "wpb": "126.2", "bsz": "8", "num_updates": "4000", "lr": "2.48e-05", "gnorm": "5.135", "loss_scale": "256", "train_wall": "330", "gb_free": "7.1", "wall": "9139"} [2024-07-06 03:22:05,084][train_inner][INFO] - {"epoch": 1, "update": 0.028, "loss": "5.654", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "33.665", "wer_total": "127.515", "n_error": "93.585", "ppl": "50.34", "accuracy": "26.401", "wer": "73.391", "wps": "77.1", "ups": "0.6", "wpb": "127.5", "bsz": "8", "num_updates": "4200", "lr": "2.579e-05", "gnorm": "5.059", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "9469"} [2024-07-06 03:27:35,931][train_inner][INFO] - {"epoch": 1, "update": 0.029, "loss": "5.597", "ntokens": "127.63", "acc_total": "127.63", "n_correct": "34.215", "wer_total": "127.63", "n_error": "93.16", "ppl": "48.4", "accuracy": "26.808", "wer": "72.992", "wps": "77.2", "ups": "0.6", "wpb": "127.6", "bsz": "8", "num_updates": "4400", "lr": "2.678e-05", "gnorm": "5.08", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "9800"} [2024-07-06 03:33:07,016][train_inner][INFO] - {"epoch": 1, "update": 0.03, "loss": "5.562", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "34.505", "wer_total": "126.78", "n_error": "92.09", "ppl": "47.25", "accuracy": "27.216", "wer": "72.638", "wps": "76.6", "ups": "0.6", "wpb": "126.8", "bsz": "8", "num_updates": "4600", "lr": "2.777e-05", "gnorm": "5.021", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "10131"} [2024-07-06 03:38:38,014][train_inner][INFO] - {"epoch": 1, "update": 0.032, "loss": "5.608", "ntokens": "127.27", "acc_total": "127.27", "n_correct": "34.565", "wer_total": "127.27", "n_error": "92.47", "ppl": "48.77", "accuracy": "27.159", "wer": "72.657", "wps": "76.9", "ups": "0.6", "wpb": "127.3", "bsz": "8", "num_updates": "4800", "lr": "2.876e-05", "gnorm": "5.03", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "10462"} [2024-07-06 03:44:08,979][train_inner][INFO] - {"epoch": 1, "update": 0.033, "loss": "5.555", "ntokens": "126.41", "acc_total": "126.41", "n_correct": "34.9", "wer_total": "126.41", "n_error": "91.215", "ppl": "47.01", "accuracy": "27.609", "wer": "72.158", "wps": "76.4", "ups": "0.6", "wpb": "126.4", "bsz": "8", "num_updates": "5000", "lr": "2.975e-05", "gnorm": "5.165", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "10793"} [2024-07-06 03:44:08,979][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 04:25:57,757][valid][INFO] - {"epoch": 1, "valid_loss": "5.439", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "5.10063", "valid_wer_total": "18.1585", "valid_n_error": "13.0262", "valid_ppl": "43.37", "valid_accuracy": "28.089", "valid_wer": "71.736", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "5000", "valid_best_accuracy": "28.089"} [2024-07-06 04:25:57,758][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 5000 updates [2024-07-06 04:25:57,758][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_5000.pt [2024-07-06 04:26:00,815][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_5000.pt [2024-07-06 04:26:04,909][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_5000.pt (epoch 1 @ 5000 updates, score 28.089) (writing took 7.151080717012519 seconds) [2024-07-06 04:31:35,522][train_inner][INFO] - {"epoch": 1, "update": 0.034, "loss": "5.469", "ntokens": "127.635", "acc_total": "127.635", "n_correct": "36.745", "wer_total": "127.635", "n_error": "90.605", "ppl": "44.3", "accuracy": "28.789", "wer": "70.988", "wps": "9", "ups": "0.07", "wpb": "127.6", "bsz": "8", "num_updates": "5200", "lr": "3.074e-05", "gnorm": "5.207", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "13640"} [2024-07-06 04:37:06,541][train_inner][INFO] - {"epoch": 1, "update": 0.036, "loss": "5.478", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "36.99", "wer_total": "126.21", "n_error": "88.98", "ppl": "44.57", "accuracy": "29.308", "wer": "70.502", "wps": "76.3", "ups": "0.6", "wpb": "126.2", "bsz": "8", "num_updates": "5400", "lr": "3.173e-05", "gnorm": "5.459", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "13971"} [2024-07-06 04:42:37,371][train_inner][INFO] - {"epoch": 1, "update": 0.037, "loss": "5.395", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "38.145", "wer_total": "126.31", "n_error": "87.88", "ppl": "42.07", "accuracy": "30.2", "wer": "69.575", "wps": "76.4", "ups": "0.6", "wpb": "126.3", "bsz": "8", "num_updates": "5600", "lr": "3.272e-05", "gnorm": "5.786", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "14301"} [2024-07-06 04:48:08,210][train_inner][INFO] - {"epoch": 1, "update": 0.038, "loss": "5.282", "ntokens": "126.205", "acc_total": "126.205", "n_correct": "39.795", "wer_total": "126.205", "n_error": "86.21", "ppl": "38.92", "accuracy": "31.532", "wer": "68.309", "wps": "76.3", "ups": "0.6", "wpb": "126.2", "bsz": "8", "num_updates": "5800", "lr": "3.371e-05", "gnorm": "5.858", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "14632"} [2024-07-06 04:53:38,733][train_inner][INFO] - {"epoch": 1, "update": 0.04, "loss": "5.241", "ntokens": "125.825", "acc_total": "125.825", "n_correct": "39.9", "wer_total": "125.825", "n_error": "85.695", "ppl": "37.81", "accuracy": "31.711", "wer": "68.106", "wps": "76.1", "ups": "0.61", "wpb": "125.8", "bsz": "8", "num_updates": "6000", "lr": "3.47e-05", "gnorm": "6.174", "loss_scale": "512", "train_wall": "330", "gb_free": "7.1", "wall": "14963"} [2024-07-06 04:59:09,871][train_inner][INFO] - {"epoch": 1, "update": 0.041, "loss": "5.192", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "41.88", "wer_total": "127.135", "n_error": "84.95", "ppl": "36.56", "accuracy": "32.941", "wer": "66.819", "wps": "76.8", "ups": "0.6", "wpb": "127.1", "bsz": "8", "num_updates": "6200", "lr": "3.569e-05", "gnorm": "6.543", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "15294"} [2024-07-06 05:04:40,721][train_inner][INFO] - {"epoch": 1, "update": 0.042, "loss": "5.079", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "43.445", "wer_total": "126.58", "n_error": "82.84", "ppl": "33.79", "accuracy": "34.322", "wer": "65.445", "wps": "76.5", "ups": "0.6", "wpb": "126.6", "bsz": "8", "num_updates": "6400", "lr": "3.668e-05", "gnorm": "6.956", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "15625"} [2024-07-06 05:10:11,673][train_inner][INFO] - {"epoch": 1, "update": 0.044, "loss": "5.005", "ntokens": "127.435", "acc_total": "127.435", "n_correct": "44.85", "wer_total": "127.435", "n_error": "82.33", "ppl": "32.1", "accuracy": "35.194", "wer": "64.605", "wps": "77", "ups": "0.6", "wpb": "127.4", "bsz": "8", "num_updates": "6600", "lr": "3.767e-05", "gnorm": "7.239", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "15956"} [2024-07-06 05:15:42,378][train_inner][INFO] - {"epoch": 1, "update": 0.045, "loss": "4.84", "ntokens": "126.375", "acc_total": "126.375", "n_correct": "46.99", "wer_total": "126.375", "n_error": "79.165", "ppl": "28.64", "accuracy": "37.183", "wer": "62.643", "wps": "76.4", "ups": "0.6", "wpb": "126.4", "bsz": "8", "num_updates": "6800", "lr": "3.866e-05", "gnorm": "7.355", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "16286"} [2024-07-06 05:21:13,524][train_inner][INFO] - {"epoch": 1, "update": 0.046, "loss": "4.697", "ntokens": "126.99", "acc_total": "126.99", "n_correct": "49.4", "wer_total": "126.99", "n_error": "77.315", "ppl": "25.93", "accuracy": "38.901", "wer": "60.883", "wps": "76.7", "ups": "0.6", "wpb": "127", "bsz": "8", "num_updates": "7000", "lr": "3.965e-05", "gnorm": "7.817", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "16618"} [2024-07-06 05:26:44,501][train_inner][INFO] - {"epoch": 1, "update": 0.048, "loss": "4.673", "ntokens": "128.31", "acc_total": "128.31", "n_correct": "50.13", "wer_total": "128.31", "n_error": "77.975", "ppl": "25.51", "accuracy": "39.069", "wer": "60.771", "wps": "77.5", "ups": "0.6", "wpb": "128.3", "bsz": "8", "num_updates": "7200", "lr": "4.064e-05", "gnorm": "8.058", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "16949"} [2024-07-06 05:32:15,784][train_inner][INFO] - {"epoch": 1, "update": 0.049, "loss": "4.556", "ntokens": "127.815", "acc_total": "127.815", "n_correct": "52.035", "wer_total": "127.815", "n_error": "75.545", "ppl": "23.53", "accuracy": "40.711", "wer": "59.105", "wps": "77.2", "ups": "0.6", "wpb": "127.8", "bsz": "8", "num_updates": "7400", "lr": "4.163e-05", "gnorm": "8.289", "loss_scale": "1024", "train_wall": "331", "gb_free": "7.1", "wall": "17280"} [2024-07-06 05:35:01,141][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 06:16:50,946][valid][INFO] - {"epoch": 1, "valid_loss": "nan", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "8.03602", "valid_wer_total": "18.1585", "valid_n_error": "10.1055", "valid_ppl": "nan", "valid_accuracy": "44.255", "valid_wer": "55.652", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "7500", "valid_best_accuracy": "44.255"} [2024-07-06 06:16:50,947][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 7500 updates [2024-07-06 06:16:50,947][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_7500.pt [2024-07-06 06:16:54,086][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_7500.pt [2024-07-06 06:16:58,518][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_7500.pt (epoch 1 @ 7500 updates, score 44.255) (writing took 7.571079046989325 seconds) [2024-07-06 06:19:43,783][train_inner][INFO] - {"epoch": 1, "update": 0.05, "loss": "4.502", "ntokens": "126.495", "acc_total": "126.495", "n_correct": "52.145", "wer_total": "126.495", "n_error": "74.185", "ppl": "22.66", "accuracy": "41.223", "wer": "58.647", "wps": "8.9", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "7600", "lr": "4.262e-05", "gnorm": "8.358", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "20128"} [2024-07-06 06:25:14,702][train_inner][INFO] - {"epoch": 1, "update": 0.052, "loss": "4.387", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "54.05", "wer_total": "126.795", "n_error": "72.545", "ppl": "20.93", "accuracy": "42.628", "wer": "57.214", "wps": "76.6", "ups": "0.6", "wpb": "126.8", "bsz": "8", "num_updates": "7800", "lr": "4.361e-05", "gnorm": "8.551", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "20459"} [2024-07-06 06:30:45,857][train_inner][INFO] - {"epoch": 1, "update": 0.053, "loss": "4.359", "ntokens": "126.44", "acc_total": "126.44", "n_correct": "53.975", "wer_total": "126.44", "n_error": "72.25", "ppl": "20.52", "accuracy": "42.688", "wer": "57.142", "wps": "76.4", "ups": "0.6", "wpb": "126.4", "bsz": "8", "num_updates": "8000", "lr": "4.46e-05", "gnorm": "8.603", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "20790"} [2024-07-06 06:36:16,783][train_inner][INFO] - {"epoch": 1, "update": 0.054, "loss": "4.255", "ntokens": "127.455", "acc_total": "127.455", "n_correct": "55.98", "wer_total": "127.455", "n_error": "71.29", "ppl": "19.09", "accuracy": "43.921", "wer": "55.933", "wps": "77", "ups": "0.6", "wpb": "127.5", "bsz": "8", "num_updates": "8200", "lr": "4.559e-05", "gnorm": "8.744", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "21121"} [2024-07-06 06:41:47,787][train_inner][INFO] - {"epoch": 1, "update": 0.056, "loss": "4.267", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "55.315", "wer_total": "126.835", "n_error": "71.35", "ppl": "19.25", "accuracy": "43.612", "wer": "56.254", "wps": "76.6", "ups": "0.6", "wpb": "126.8", "bsz": "8", "num_updates": "8400", "lr": "4.658e-05", "gnorm": "8.857", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "21452"} [2024-07-06 06:47:18,997][train_inner][INFO] - {"epoch": 1, "update": 0.057, "loss": "4.072", "ntokens": "127.67", "acc_total": "127.67", "n_correct": "58.03", "wer_total": "127.67", "n_error": "69.46", "ppl": "16.82", "accuracy": "45.453", "wer": "54.406", "wps": "77.1", "ups": "0.6", "wpb": "127.7", "bsz": "8", "num_updates": "8600", "lr": "4.757e-05", "gnorm": "8.832", "loss_scale": "2048", "train_wall": "331", "gb_free": "7.1", "wall": "21783"} [2024-07-06 06:52:50,001][train_inner][INFO] - {"epoch": 1, "update": 0.058, "loss": "4.018", "ntokens": "127.93", "acc_total": "127.93", "n_correct": "58.895", "wer_total": "127.93", "n_error": "68.89", "ppl": "16.2", "accuracy": "46.037", "wer": "53.85", "wps": "77.3", "ups": "0.6", "wpb": "127.9", "bsz": "8", "num_updates": "8800", "lr": "4.856e-05", "gnorm": "9.109", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "22114"} [2024-07-06 06:58:20,981][train_inner][INFO] - {"epoch": 1, "update": 0.06, "loss": "3.979", "ntokens": "127.805", "acc_total": "127.805", "n_correct": "59.615", "wer_total": "127.805", "n_error": "67.98", "ppl": "15.77", "accuracy": "46.645", "wer": "53.19", "wps": "77.2", "ups": "0.6", "wpb": "127.8", "bsz": "8", "num_updates": "9000", "lr": "4.955e-05", "gnorm": "9.084", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "22445"} [2024-07-06 07:03:51,986][train_inner][INFO] - {"epoch": 1, "update": 0.061, "loss": "3.891", "ntokens": "126.95", "acc_total": "126.95", "n_correct": "60.58", "wer_total": "126.95", "n_error": "66.285", "ppl": "14.83", "accuracy": "47.72", "wer": "52.213", "wps": "76.7", "ups": "0.6", "wpb": "127", "bsz": "8", "num_updates": "9200", "lr": "5.054e-05", "gnorm": "9.254", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "22776"} [2024-07-06 07:09:22,725][train_inner][INFO] - {"epoch": 1, "update": 0.062, "loss": "3.817", "ntokens": "127.38", "acc_total": "127.38", "n_correct": "61.175", "wer_total": "127.38", "n_error": "66.05", "ppl": "14.1", "accuracy": "48.026", "wer": "51.853", "wps": "77", "ups": "0.6", "wpb": "127.4", "bsz": "8", "num_updates": "9400", "lr": "5.153e-05", "gnorm": "9.397", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "23107"} [2024-07-06 07:14:53,505][train_inner][INFO] - {"epoch": 1, "update": 0.064, "loss": "3.711", "ntokens": "126.42", "acc_total": "126.42", "n_correct": "62.035", "wer_total": "126.42", "n_error": "64.29", "ppl": "13.1", "accuracy": "49.071", "wer": "50.854", "wps": "76.4", "ups": "0.6", "wpb": "126.4", "bsz": "8", "num_updates": "9600", "lr": "5.252e-05", "gnorm": "9.424", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "23438"} [2024-07-06 07:20:24,927][train_inner][INFO] - {"epoch": 1, "update": 0.065, "loss": "3.71", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "62.55", "wer_total": "126.975", "n_error": "64.325", "ppl": "13.09", "accuracy": "49.262", "wer": "50.66", "wps": "76.6", "ups": "0.6", "wpb": "127", "bsz": "8", "num_updates": "9800", "lr": "5.351e-05", "gnorm": "9.234", "loss_scale": "2048", "train_wall": "331", "gb_free": "7.1", "wall": "23769"} [2024-07-06 07:25:55,764][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "3.561", "ntokens": "126.615", "acc_total": "126.615", "n_correct": "64.27", "wer_total": "126.615", "n_error": "62.26", "ppl": "11.8", "accuracy": "50.76", "wer": "49.173", "wps": "76.5", "ups": "0.6", "wpb": "126.6", "bsz": "8", "num_updates": "10000", "lr": "5.45e-05", "gnorm": "9.312", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "24100"} [2024-07-06 07:25:55,765][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 08:08:05,206][valid][INFO] - {"epoch": 1, "valid_loss": "3.303", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "9.77541", "valid_wer_total": "18.1585", "valid_n_error": "8.37086", "valid_ppl": "9.87", "valid_accuracy": "53.834", "valid_wer": "46.099", "valid_wps": "172.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "10000", "valid_best_accuracy": "53.834"} [2024-07-06 08:08:05,207][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 10000 updates [2024-07-06 08:08:05,207][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_10000.pt [2024-07-06 08:08:08,300][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_10000.pt [2024-07-06 08:08:12,556][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_10000.pt (epoch 1 @ 10000 updates, score 53.834) (writing took 7.348568329995032 seconds) [2024-07-06 08:14:19,667][train_inner][INFO] - {"epoch": 1, "update": 0.068, "loss": "3.481", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "65.6", "wer_total": "126.86", "n_error": "61.15", "ppl": "11.17", "accuracy": "51.711", "wer": "48.203", "wps": "8.7", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "10200", "lr": "5.549e-05", "gnorm": "12.915", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "27004"} [2024-07-06 08:19:33,156][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-06 08:20:28,227][train_inner][INFO] - {"epoch": 1, "update": 0.069, "loss": "3.356", "ntokens": "127.84", "acc_total": "127.84", "n_correct": "68.37", "wer_total": "127.84", "n_error": "59.37", "ppl": "10.24", "accuracy": "53.481", "wer": "46.441", "wps": "69.4", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "10400", "lr": "5.648e-05", "gnorm": "12.592", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "27372"} [2024-07-06 08:26:34,879][train_inner][INFO] - {"epoch": 1, "update": 0.07, "loss": "3.253", "ntokens": "127.84", "acc_total": "127.84", "n_correct": "69.435", "wer_total": "127.84", "n_error": "58.275", "ppl": "9.53", "accuracy": "54.314", "wer": "45.584", "wps": "69.7", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "10600", "lr": "5.747e-05", "gnorm": "13.087", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "27739"} [2024-07-06 08:32:41,988][train_inner][INFO] - {"epoch": 1, "update": 0.072, "loss": "3.104", "ntokens": "126.645", "acc_total": "126.645", "n_correct": "71.235", "wer_total": "126.645", "n_error": "55.295", "ppl": "8.6", "accuracy": "56.248", "wer": "43.661", "wps": "69", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "10800", "lr": "5.846e-05", "gnorm": "12.338", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "28106"} [2024-07-06 08:38:48,736][train_inner][INFO] - {"epoch": 1, "update": 0.073, "loss": "3.046", "ntokens": "127.14", "acc_total": "127.14", "n_correct": "72.28", "wer_total": "127.14", "n_error": "54.73", "ppl": "8.26", "accuracy": "56.851", "wer": "43.047", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "11000", "lr": "5.945e-05", "gnorm": "12.951", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "28473"} [2024-07-06 08:44:55,492][train_inner][INFO] - {"epoch": 1, "update": 0.074, "loss": "2.969", "ntokens": "126.69", "acc_total": "126.69", "n_correct": "72.74", "wer_total": "126.69", "n_error": "53.835", "ppl": "7.83", "accuracy": "57.416", "wer": "42.493", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "11200", "lr": "6.044e-05", "gnorm": "12.828", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "28840"} [2024-07-06 08:51:02,527][train_inner][INFO] - {"epoch": 1, "update": 0.076, "loss": "2.877", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "74.925", "wer_total": "126.92", "n_error": "51.9", "ppl": "7.35", "accuracy": "59.033", "wer": "40.892", "wps": "69.2", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "11400", "lr": "6.143e-05", "gnorm": "12.334", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "29207"} [2024-07-06 08:57:09,274][train_inner][INFO] - {"epoch": 1, "update": 0.077, "loss": "2.806", "ntokens": "126.39", "acc_total": "126.39", "n_correct": "75.6", "wer_total": "126.39", "n_error": "50.675", "ppl": "6.99", "accuracy": "59.815", "wer": "40.094", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "11600", "lr": "6.242e-05", "gnorm": "12.367", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "29573"} [2024-07-06 09:02:13,597][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-06 09:03:17,586][train_inner][INFO] - {"epoch": 1, "update": 0.078, "loss": "2.723", "ntokens": "126.72", "acc_total": "126.72", "n_correct": "76.54", "wer_total": "126.72", "n_error": "50.105", "ppl": "6.6", "accuracy": "60.401", "wer": "39.54", "wps": "68.8", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "11800", "lr": "6.341e-05", "gnorm": "12.479", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "29942"} [2024-07-06 09:09:24,555][train_inner][INFO] - {"epoch": 1, "update": 0.08, "loss": "2.637", "ntokens": "127.67", "acc_total": "127.67", "n_correct": "78.46", "wer_total": "127.67", "n_error": "49.145", "ppl": "6.22", "accuracy": "61.455", "wer": "38.494", "wps": "69.6", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "12000", "lr": "6.44e-05", "gnorm": "12.359", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "30309"} [2024-07-06 09:15:31,421][train_inner][INFO] - {"epoch": 1, "update": 0.081, "loss": "2.627", "ntokens": "126.695", "acc_total": "126.695", "n_correct": "78.245", "wer_total": "126.695", "n_error": "48.335", "ppl": "6.18", "accuracy": "61.759", "wer": "38.151", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "12200", "lr": "6.539e-05", "gnorm": "12.426", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "30675"} [2024-07-06 09:21:38,404][train_inner][INFO] - {"epoch": 1, "update": 0.082, "loss": "2.584", "ntokens": "127.035", "acc_total": "127.035", "n_correct": "79.14", "wer_total": "127.035", "n_error": "47.825", "ppl": "6", "accuracy": "62.298", "wer": "37.647", "wps": "69.2", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "12400", "lr": "6.638e-05", "gnorm": "12.516", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "31042"} [2024-07-06 09:24:41,797][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 10:06:36,670][valid][INFO] - {"epoch": 1, "valid_loss": "2.234", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "12.0675", "valid_wer_total": "18.1585", "valid_n_error": "6.08212", "valid_ppl": "4.7", "valid_accuracy": "66.457", "valid_wer": "33.495", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "12500", "valid_best_accuracy": "66.457"} [2024-07-06 10:06:36,670][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12500 updates [2024-07-06 10:06:36,671][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12500.pt [2024-07-06 10:06:39,821][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12500.pt [2024-07-06 10:06:44,048][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12500.pt (epoch 1 @ 12500 updates, score 66.457) (writing took 7.3773015209590085 seconds) [2024-07-06 10:09:47,056][train_inner][INFO] - {"epoch": 1, "update": 0.084, "loss": "2.527", "ntokens": "126.195", "acc_total": "126.195", "n_correct": "78.815", "wer_total": "126.195", "n_error": "47.28", "ppl": "5.77", "accuracy": "62.455", "wer": "37.466", "wps": "8.7", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "12600", "lr": "6.737e-05", "gnorm": "12.538", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "33931"} [2024-07-06 10:15:54,168][train_inner][INFO] - {"epoch": 1, "update": 0.085, "loss": "2.436", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "80.995", "wer_total": "127.19", "n_error": "46.14", "ppl": "5.41", "accuracy": "63.68", "wer": "36.276", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "12800", "lr": "6.836e-05", "gnorm": "12.303", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "34298"} [2024-07-06 10:22:01,065][train_inner][INFO] - {"epoch": 1, "update": 0.086, "loss": "2.388", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "81.36", "wer_total": "126.56", "n_error": "45.11", "ppl": "5.23", "accuracy": "64.286", "wer": "35.643", "wps": "69", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "13000", "lr": "6.935e-05", "gnorm": "12.225", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "34665"} [2024-07-06 10:28:08,215][train_inner][INFO] - {"epoch": 1, "update": 0.088, "loss": "2.398", "ntokens": "127.02", "acc_total": "127.02", "n_correct": "81.435", "wer_total": "127.02", "n_error": "45.505", "ppl": "5.27", "accuracy": "64.112", "wer": "35.825", "wps": "69.2", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "13200", "lr": "7.034e-05", "gnorm": "12.277", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "35032"} [2024-07-06 10:34:16,289][train_inner][INFO] - {"epoch": 1, "update": 0.089, "loss": "2.324", "ntokens": "127.305", "acc_total": "127.305", "n_correct": "82.445", "wer_total": "127.305", "n_error": "44.83", "ppl": "5.01", "accuracy": "64.762", "wer": "35.215", "wps": "69.2", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "13400", "lr": "7.133e-05", "gnorm": "11.657", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "35400"} [2024-07-06 10:40:22,996][train_inner][INFO] - {"epoch": 1, "update": 0.09, "loss": "2.304", "ntokens": "126", "acc_total": "126", "n_correct": "82.055", "wer_total": "126", "n_error": "43.865", "ppl": "4.94", "accuracy": "65.123", "wer": "34.813", "wps": "68.7", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "13600", "lr": "7.232e-05", "gnorm": "11.859", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "35767"} [2024-07-06 10:46:29,651][train_inner][INFO] - {"epoch": 1, "update": 0.091, "loss": "2.169", "ntokens": "126.165", "acc_total": "126.165", "n_correct": "83.915", "wer_total": "126.165", "n_error": "42.16", "ppl": "4.5", "accuracy": "66.512", "wer": "33.417", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "13800", "lr": "7.331e-05", "gnorm": "11.784", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "36134"} [2024-07-06 10:52:36,468][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "2.268", "ntokens": "126.685", "acc_total": "126.685", "n_correct": "83.265", "wer_total": "126.685", "n_error": "43.355", "ppl": "4.82", "accuracy": "65.726", "wer": "34.223", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "14000", "lr": "7.43e-05", "gnorm": "12.013", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "36501"} [2024-07-06 10:58:43,120][train_inner][INFO] - {"epoch": 1, "update": 0.094, "loss": "2.249", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "83.17", "wer_total": "126.325", "n_error": "43.095", "ppl": "4.75", "accuracy": "65.838", "wer": "34.114", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "14200", "lr": "7.529e-05", "gnorm": "12.091", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "36867"} [2024-07-06 11:04:50,045][train_inner][INFO] - {"epoch": 1, "update": 0.095, "loss": "2.197", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "84.555", "wer_total": "127.185", "n_error": "42.565", "ppl": "4.58", "accuracy": "66.482", "wer": "33.467", "wps": "69.3", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "14400", "lr": "7.628e-05", "gnorm": "11.945", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "37234"} [2024-07-06 11:10:57,311][train_inner][INFO] - {"epoch": 1, "update": 0.097, "loss": "2.082", "ntokens": "127.34", "acc_total": "127.34", "n_correct": "85.825", "wer_total": "127.34", "n_error": "41.425", "ppl": "4.23", "accuracy": "67.398", "wer": "32.531", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "14600", "lr": "7.727e-05", "gnorm": "11.482", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "37601"} [2024-07-06 11:13:05,730][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-06 11:17:06,196][train_inner][INFO] - {"epoch": 1, "update": 0.098, "loss": "2.138", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "85.06", "wer_total": "126.915", "n_error": "41.785", "ppl": "4.4", "accuracy": "67.021", "wer": "32.924", "wps": "68.8", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "14800", "lr": "7.826e-05", "gnorm": "11.686", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "37970"} [2024-07-06 11:23:13,636][train_inner][INFO] - {"epoch": 1, "update": 0.099, "loss": "2.131", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "84.96", "wer_total": "126.295", "n_error": "41.26", "ppl": "4.38", "accuracy": "67.271", "wer": "32.67", "wps": "68.7", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "15000", "lr": "7.925e-05", "gnorm": "11.217", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "38338"} [2024-07-06 11:23:13,636][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 12:05:07,724][valid][INFO] - {"epoch": 1, "valid_loss": "1.79", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "12.9712", "valid_wer_total": "18.1585", "valid_n_error": "5.1827", "valid_ppl": "3.46", "valid_accuracy": "71.433", "valid_wer": "28.541", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "71.433"} [2024-07-06 12:05:07,725][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15000 updates [2024-07-06 12:05:07,725][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_15000.pt [2024-07-06 12:05:10,859][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_15000.pt [2024-07-06 12:05:15,174][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_15000.pt (epoch 1 @ 15000 updates, score 71.433) (writing took 7.449446495971642 seconds) [2024-07-06 12:11:22,121][train_inner][INFO] - {"epoch": 1, "update": 0.101, "loss": "2.044", "ntokens": "126.44", "acc_total": "126.44", "n_correct": "86.43", "wer_total": "126.44", "n_error": "39.935", "ppl": "4.12", "accuracy": "68.357", "wer": "31.584", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "15200", "lr": "8.024e-05", "gnorm": "10.748", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "41226"} [2024-07-06 12:17:29,712][train_inner][INFO] - {"epoch": 1, "update": 0.102, "loss": "2.046", "ntokens": "127.12", "acc_total": "127.12", "n_correct": "86.445", "wer_total": "127.12", "n_error": "40.595", "ppl": "4.13", "accuracy": "68.003", "wer": "31.934", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "15400", "lr": "8.123e-05", "gnorm": "11.301", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "41594"} [2024-07-06 12:23:36,775][train_inner][INFO] - {"epoch": 1, "update": 0.103, "loss": "2.032", "ntokens": "127.505", "acc_total": "127.505", "n_correct": "86.64", "wer_total": "127.505", "n_error": "40.815", "ppl": "4.09", "accuracy": "67.95", "wer": "32.011", "wps": "69.5", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "15600", "lr": "8.222e-05", "gnorm": "11.225", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "41961"} [2024-07-06 12:29:43,774][train_inner][INFO] - {"epoch": 1, "update": 0.105, "loss": "1.953", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "88.145", "wer_total": "127.64", "n_error": "39.425", "ppl": "3.87", "accuracy": "69.058", "wer": "30.888", "wps": "69.6", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "15800", "lr": "8.321e-05", "gnorm": "10.685", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "42328"} [2024-07-06 12:35:50,932][train_inner][INFO] - {"epoch": 1, "update": 0.106, "loss": "2.004", "ntokens": "126.335", "acc_total": "126.335", "n_correct": "86.655", "wer_total": "126.335", "n_error": "39.625", "ppl": "4.01", "accuracy": "68.591", "wer": "31.365", "wps": "68.8", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "16000", "lr": "8.42e-05", "gnorm": "11.322", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "42695"} [2024-07-06 12:41:58,522][train_inner][INFO] - {"epoch": 1, "update": 0.107, "loss": "1.886", "ntokens": "126.105", "acc_total": "126.105", "n_correct": "87.505", "wer_total": "126.105", "n_error": "38.55", "ppl": "3.7", "accuracy": "69.391", "wer": "30.57", "wps": "68.6", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "16200", "lr": "8.519e-05", "gnorm": "11.118", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "43063"} [2024-07-06 12:48:06,505][train_inner][INFO] - {"epoch": 1, "update": 0.109, "loss": "1.906", "ntokens": "126.34", "acc_total": "126.34", "n_correct": "88.205", "wer_total": "126.34", "n_error": "38.1", "ppl": "3.75", "accuracy": "69.816", "wer": "30.157", "wps": "68.7", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "16400", "lr": "8.618e-05", "gnorm": "10.682", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "43431"} [2024-07-06 12:54:14,350][train_inner][INFO] - {"epoch": 1, "update": 0.11, "loss": "1.923", "ntokens": "127.355", "acc_total": "127.355", "n_correct": "88.665", "wer_total": "127.355", "n_error": "38.625", "ppl": "3.79", "accuracy": "69.62", "wer": "30.329", "wps": "69.2", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "16600", "lr": "8.717e-05", "gnorm": "10.82", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "43798"} [2024-07-06 13:00:22,474][train_inner][INFO] - {"epoch": 1, "update": 0.111, "loss": "1.869", "ntokens": "128.04", "acc_total": "128.04", "n_correct": "89.3", "wer_total": "128.04", "n_error": "38.66", "ppl": "3.65", "accuracy": "69.744", "wer": "30.194", "wps": "69.6", "ups": "0.54", "wpb": "128", "bsz": "8", "num_updates": "16800", "lr": "8.816e-05", "gnorm": "10.863", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "44167"} [2024-07-06 13:06:30,445][train_inner][INFO] - {"epoch": 1, "update": 0.113, "loss": "1.93", "ntokens": "127.105", "acc_total": "127.105", "n_correct": "88.025", "wer_total": "127.105", "n_error": "38.99", "ppl": "3.81", "accuracy": "69.254", "wer": "30.675", "wps": "69.1", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "17000", "lr": "8.915e-05", "gnorm": "10.682", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "44535"} [2024-07-06 13:07:23,658][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-06 13:12:39,915][train_inner][INFO] - {"epoch": 1, "update": 0.114, "loss": "1.859", "ntokens": "125.955", "acc_total": "125.955", "n_correct": "88.11", "wer_total": "125.955", "n_error": "37.775", "ppl": "3.63", "accuracy": "69.954", "wer": "29.991", "wps": "68.2", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "17200", "lr": "9.014e-05", "gnorm": "10.297", "loss_scale": "1024", "train_wall": "369", "gb_free": "6.5", "wall": "44904"} [2024-07-06 13:18:47,840][train_inner][INFO] - {"epoch": 1, "update": 0.115, "loss": "1.819", "ntokens": "127.745", "acc_total": "127.745", "n_correct": "90.535", "wer_total": "127.745", "n_error": "37.155", "ppl": "3.53", "accuracy": "70.872", "wer": "29.085", "wps": "69.4", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "17400", "lr": "9.113e-05", "gnorm": "10.467", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "45272"} [2024-07-06 13:21:51,588][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 14:03:50,521][valid][INFO] - {"epoch": 1, "valid_loss": "1.6", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.3436", "valid_wer_total": "18.1585", "valid_n_error": "4.81021", "valid_ppl": "3.03", "valid_accuracy": "73.484", "valid_wer": "26.49", "valid_wps": "172.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "17500", "valid_best_accuracy": "73.484"} [2024-07-06 14:03:50,522][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 17500 updates [2024-07-06 14:03:50,522][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_17500.pt [2024-07-06 14:03:53,687][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_17500.pt [2024-07-06 14:03:57,746][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_17500.pt (epoch 1 @ 17500 updates, score 73.484) (writing took 7.2238709179800935 seconds) [2024-07-06 14:07:02,065][train_inner][INFO] - {"epoch": 1, "update": 0.117, "loss": "1.868", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "88.945", "wer_total": "126.53", "n_error": "37.535", "ppl": "3.65", "accuracy": "70.296", "wer": "29.665", "wps": "8.7", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "17600", "lr": "9.212e-05", "gnorm": "10.835", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "48166"} [2024-07-06 14:13:12,115][train_inner][INFO] - {"epoch": 1, "update": 0.118, "loss": "1.81", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "89.47", "wer_total": "126.77", "n_error": "37.24", "ppl": "3.51", "accuracy": "70.577", "wer": "29.376", "wps": "68.5", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "17800", "lr": "9.311e-05", "gnorm": "10.72", "loss_scale": "1024", "train_wall": "369", "gb_free": "6.5", "wall": "48536"} [2024-07-06 14:19:22,245][train_inner][INFO] - {"epoch": 1, "update": 0.119, "loss": "1.798", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "89.73", "wer_total": "126.935", "n_error": "37.145", "ppl": "3.48", "accuracy": "70.69", "wer": "29.263", "wps": "68.6", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "18000", "lr": "9.41e-05", "gnorm": "10.242", "loss_scale": "1024", "train_wall": "369", "gb_free": "6.5", "wall": "48906"} [2024-07-06 14:25:31,985][train_inner][INFO] - {"epoch": 1, "update": 0.121, "loss": "1.814", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "89.88", "wer_total": "127.03", "n_error": "37.1", "ppl": "3.52", "accuracy": "70.755", "wer": "29.206", "wps": "68.7", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "18200", "lr": "9.509e-05", "gnorm": "10.085", "loss_scale": "1024", "train_wall": "369", "gb_free": "6.5", "wall": "49276"} [2024-07-06 14:31:40,078][train_inner][INFO] - {"epoch": 1, "update": 0.122, "loss": "1.774", "ntokens": "127", "acc_total": "127", "n_correct": "90.42", "wer_total": "127", "n_error": "36.505", "ppl": "3.42", "accuracy": "71.197", "wer": "28.744", "wps": "69", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "18400", "lr": "9.608e-05", "gnorm": "9.997", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "49644"} [2024-07-06 14:37:48,385][train_inner][INFO] - {"epoch": 1, "update": 0.123, "loss": "1.777", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "90.61", "wer_total": "127.135", "n_error": "36.455", "ppl": "3.43", "accuracy": "71.271", "wer": "28.674", "wps": "69", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "18600", "lr": "9.707e-05", "gnorm": "10.034", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "50012"} [2024-07-06 14:43:56,533][train_inner][INFO] - {"epoch": 1, "update": 0.125, "loss": "1.794", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "90.4", "wer_total": "127.42", "n_error": "36.98", "ppl": "3.47", "accuracy": "70.946", "wer": "29.022", "wps": "69.2", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "18800", "lr": "9.806e-05", "gnorm": "9.624", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "50381"} [2024-07-06 14:50:04,632][train_inner][INFO] - {"epoch": 1, "update": 0.126, "loss": "1.748", "ntokens": "127.25", "acc_total": "127.25", "n_correct": "90.67", "wer_total": "127.25", "n_error": "36.555", "ppl": "3.36", "accuracy": "71.253", "wer": "28.727", "wps": "69.1", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "19000", "lr": "9.905e-05", "gnorm": "10.063", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "50749"} [2024-07-06 14:56:12,560][train_inner][INFO] - {"epoch": 1, "update": 0.127, "loss": "1.742", "ntokens": "126.075", "acc_total": "126.075", "n_correct": "89.76", "wer_total": "126.075", "n_error": "36.24", "ppl": "3.34", "accuracy": "71.196", "wer": "28.745", "wps": "68.5", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "19200", "lr": "0.00010004", "gnorm": "9.95", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "51117"} [2024-07-06 15:02:20,573][train_inner][INFO] - {"epoch": 1, "update": 0.129, "loss": "1.715", "ntokens": "127.22", "acc_total": "127.22", "n_correct": "91.115", "wer_total": "127.22", "n_error": "36.055", "ppl": "3.28", "accuracy": "71.62", "wer": "28.341", "wps": "69.1", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "19400", "lr": "0.00010103", "gnorm": "9.377", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "51485"} [2024-07-06 15:08:28,557][train_inner][INFO] - {"epoch": 1, "update": 0.13, "loss": "1.74", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "91", "wer_total": "126.935", "n_error": "35.89", "ppl": "3.34", "accuracy": "71.69", "wer": "28.274", "wps": "69", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "19600", "lr": "0.00010202", "gnorm": "9.793", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "51853"} [2024-07-06 15:14:36,349][train_inner][INFO] - {"epoch": 1, "update": 0.131, "loss": "1.685", "ntokens": "125.605", "acc_total": "125.605", "n_correct": "90.45", "wer_total": "125.605", "n_error": "35.13", "ppl": "3.21", "accuracy": "72.011", "wer": "27.969", "wps": "68.3", "ups": "0.54", "wpb": "125.6", "bsz": "8", "num_updates": "19800", "lr": "0.00010301", "gnorm": "9.645", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "52220"} [2024-07-06 15:20:44,256][train_inner][INFO] - {"epoch": 1, "update": 0.133, "loss": "1.669", "ntokens": "126.39", "acc_total": "126.39", "n_correct": "91.175", "wer_total": "126.39", "n_error": "35.165", "ppl": "3.18", "accuracy": "72.138", "wer": "27.823", "wps": "68.7", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "20000", "lr": "0.000104", "gnorm": "9.616", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "52588"} [2024-07-06 15:20:44,257][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 16:02:43,552][valid][INFO] - {"epoch": 1, "valid_loss": "1.425", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.7193", "valid_wer_total": "18.1585", "valid_n_error": "4.43389", "valid_ppl": "2.69", "valid_accuracy": "75.553", "valid_wer": "24.418", "valid_wps": "172.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "20000", "valid_best_accuracy": "75.553"} [2024-07-06 16:02:43,553][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 20000 updates [2024-07-06 16:02:43,553][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_20000.pt [2024-07-06 16:02:46,816][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_20000.pt [2024-07-06 16:02:50,999][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_20000.pt (epoch 1 @ 20000 updates, score 75.553) (writing took 7.44634991500061 seconds) [2024-07-06 16:08:58,966][train_inner][INFO] - {"epoch": 1, "update": 0.134, "loss": "1.65", "ntokens": "127.77", "acc_total": "127.77", "n_correct": "92.235", "wer_total": "127.77", "n_error": "35.505", "ppl": "3.14", "accuracy": "72.188", "wer": "27.788", "wps": "8.8", "ups": "0.07", "wpb": "127.8", "bsz": "8", "num_updates": "20200", "lr": "0.00010499", "gnorm": "9.431", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "55483"} [2024-07-06 16:15:07,570][train_inner][INFO] - {"epoch": 1, "update": 0.135, "loss": "1.639", "ntokens": "127.935", "acc_total": "127.935", "n_correct": "92.76", "wer_total": "127.935", "n_error": "35.14", "ppl": "3.11", "accuracy": "72.506", "wer": "27.467", "wps": "69.4", "ups": "0.54", "wpb": "127.9", "bsz": "8", "num_updates": "20400", "lr": "0.00010598", "gnorm": "9.1", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "55852"} [2024-07-06 16:21:15,855][train_inner][INFO] - {"epoch": 1, "update": 0.137, "loss": "1.678", "ntokens": "127.195", "acc_total": "127.195", "n_correct": "91.83", "wer_total": "127.195", "n_error": "35.305", "ppl": "3.2", "accuracy": "72.196", "wer": "27.757", "wps": "69.1", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "20600", "lr": "0.00010697", "gnorm": "9.401", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "56220"} [2024-07-06 16:27:24,128][train_inner][INFO] - {"epoch": 1, "update": 0.138, "loss": "1.626", "ntokens": "127.285", "acc_total": "127.285", "n_correct": "92.285", "wer_total": "127.285", "n_error": "34.935", "ppl": "3.09", "accuracy": "72.503", "wer": "27.446", "wps": "69.1", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "20800", "lr": "0.00010796", "gnorm": "9.042", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "56588"} [2024-07-06 16:33:32,463][train_inner][INFO] - {"epoch": 1, "update": 0.139, "loss": "1.64", "ntokens": "126.71", "acc_total": "126.71", "n_correct": "91.74", "wer_total": "126.71", "n_error": "34.92", "ppl": "3.12", "accuracy": "72.402", "wer": "27.559", "wps": "68.8", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "21000", "lr": "0.00010895", "gnorm": "9.071", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "56957"} [2024-07-06 16:39:40,589][train_inner][INFO] - {"epoch": 1, "update": 0.141, "loss": "1.591", "ntokens": "127.795", "acc_total": "127.795", "n_correct": "93.545", "wer_total": "127.795", "n_error": "34.21", "ppl": "3.01", "accuracy": "73.199", "wer": "26.769", "wps": "69.4", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "21200", "lr": "0.00010994", "gnorm": "8.884", "loss_scale": "4096", "train_wall": "367", "gb_free": "6.5", "wall": "57325"} [2024-07-06 16:42:02,147][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-06 16:45:50,134][train_inner][INFO] - {"epoch": 1, "update": 0.142, "loss": "1.642", "ntokens": "126", "acc_total": "126", "n_correct": "91.61", "wer_total": "126", "n_error": "34.36", "ppl": "3.12", "accuracy": "72.706", "wer": "27.27", "wps": "68.2", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "21400", "lr": "0.00011093", "gnorm": "9.259", "loss_scale": "2048", "train_wall": "369", "gb_free": "6.5", "wall": "57694"} [2024-07-06 16:51:58,040][train_inner][INFO] - {"epoch": 1, "update": 0.143, "loss": "1.608", "ntokens": "127.525", "acc_total": "127.525", "n_correct": "92.815", "wer_total": "127.525", "n_error": "34.625", "ppl": "3.05", "accuracy": "72.782", "wer": "27.152", "wps": "69.3", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "21600", "lr": "0.00011192", "gnorm": "8.906", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "58062"} [2024-07-06 16:58:06,261][train_inner][INFO] - {"epoch": 1, "update": 0.145, "loss": "1.721", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "91.215", "wer_total": "127.03", "n_error": "35.76", "ppl": "3.3", "accuracy": "71.806", "wer": "28.151", "wps": "69", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "21800", "lr": "0.00011291", "gnorm": "9.316", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "58430"} [2024-07-06 17:04:14,592][train_inner][INFO] - {"epoch": 1, "update": 0.146, "loss": "1.63", "ntokens": "126.45", "acc_total": "126.45", "n_correct": "91.61", "wer_total": "126.45", "n_error": "34.805", "ppl": "3.09", "accuracy": "72.448", "wer": "27.525", "wps": "68.7", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "22000", "lr": "0.0001139", "gnorm": "8.82", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "58799"} [2024-07-06 17:10:22,325][train_inner][INFO] - {"epoch": 1, "update": 0.147, "loss": "1.643", "ntokens": "125.925", "acc_total": "125.925", "n_correct": "91.48", "wer_total": "125.925", "n_error": "34.395", "ppl": "3.12", "accuracy": "72.646", "wer": "27.314", "wps": "68.5", "ups": "0.54", "wpb": "125.9", "bsz": "8", "num_updates": "22200", "lr": "0.00011489", "gnorm": "8.905", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "59166"} [2024-07-06 17:16:30,003][train_inner][INFO] - {"epoch": 1, "update": 0.149, "loss": "1.59", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "92.715", "wer_total": "126.865", "n_error": "34.09", "ppl": "3.01", "accuracy": "73.082", "wer": "26.871", "wps": "69", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "22400", "lr": "0.00011588", "gnorm": "8.788", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "59534"} [2024-07-06 17:19:33,910][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 18:01:30,883][valid][INFO] - {"epoch": 1, "valid_loss": "1.322", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.9362", "valid_wer_total": "18.1585", "valid_n_error": "4.21592", "valid_ppl": "2.5", "valid_accuracy": "76.748", "valid_wer": "23.217", "valid_wps": "173.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "22500", "valid_best_accuracy": "76.748"} [2024-07-06 18:01:30,884][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 22500 updates [2024-07-06 18:01:30,884][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_22500.pt [2024-07-06 18:01:34,078][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_22500.pt [2024-07-06 18:01:38,206][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_22500.pt (epoch 1 @ 22500 updates, score 76.748) (writing took 7.322284867987037 seconds) [2024-07-06 18:04:41,542][train_inner][INFO] - {"epoch": 1, "update": 0.15, "loss": "1.559", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "93.1", "wer_total": "126.865", "n_error": "33.73", "ppl": "2.95", "accuracy": "73.385", "wer": "26.587", "wps": "8.8", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "22600", "lr": "0.00011687", "gnorm": "8.894", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "62426"} [2024-07-06 18:10:49,099][train_inner][INFO] - {"epoch": 1, "update": 0.151, "loss": "1.581", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "93.31", "wer_total": "127.465", "n_error": "34.08", "ppl": "2.99", "accuracy": "73.204", "wer": "26.737", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "22800", "lr": "0.00011786", "gnorm": "8.78", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "62793"} [2024-07-06 18:16:56,819][train_inner][INFO] - {"epoch": 1, "update": 0.153, "loss": "1.508", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "93.84", "wer_total": "126.82", "n_error": "32.89", "ppl": "2.84", "accuracy": "73.995", "wer": "25.934", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "23000", "lr": "0.00011885", "gnorm": "8.122", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "63161"} [2024-07-06 18:23:04,175][train_inner][INFO] - {"epoch": 1, "update": 0.154, "loss": "1.623", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "92.445", "wer_total": "127.135", "n_error": "34.65", "ppl": "3.08", "accuracy": "72.714", "wer": "27.254", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "23200", "lr": "0.00011984", "gnorm": "8.717", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "63528"} [2024-07-06 18:29:11,495][train_inner][INFO] - {"epoch": 1, "update": 0.155, "loss": "1.609", "ntokens": "126.895", "acc_total": "126.895", "n_correct": "91.88", "wer_total": "126.895", "n_error": "34.945", "ppl": "3.05", "accuracy": "72.406", "wer": "27.539", "wps": "69.1", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "23400", "lr": "0.00012083", "gnorm": "8.614", "loss_scale": "4096", "train_wall": "367", "gb_free": "6.5", "wall": "63896"} [2024-07-06 18:29:29,806][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-06 18:35:20,960][train_inner][INFO] - {"epoch": 1, "update": 0.156, "loss": "1.566", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "92.435", "wer_total": "126.295", "n_error": "33.825", "ppl": "2.96", "accuracy": "73.19", "wer": "26.783", "wps": "68.4", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "23600", "lr": "0.00012182", "gnorm": "8.492", "loss_scale": "2048", "train_wall": "369", "gb_free": "6.5", "wall": "64265"} [2024-07-06 18:41:28,372][train_inner][INFO] - {"epoch": 1, "update": 0.158, "loss": "1.542", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "94.465", "wer_total": "127.94", "n_error": "33.45", "ppl": "2.91", "accuracy": "73.835", "wer": "26.145", "wps": "69.6", "ups": "0.54", "wpb": "127.9", "bsz": "8", "num_updates": "23800", "lr": "0.00012281", "gnorm": "8.371", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "64632"} [2024-07-06 18:47:36,043][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "1.555", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "92.98", "wer_total": "126.665", "n_error": "33.63", "ppl": "2.94", "accuracy": "73.406", "wer": "26.55", "wps": "68.9", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "24000", "lr": "0.0001238", "gnorm": "8.25", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "65000"} [2024-07-06 18:53:44,094][train_inner][INFO] - {"epoch": 1, "update": 0.16, "loss": "1.5", "ntokens": "127.99", "acc_total": "127.99", "n_correct": "94.29", "wer_total": "127.99", "n_error": "33.645", "ppl": "2.83", "accuracy": "73.67", "wer": "26.287", "wps": "69.6", "ups": "0.54", "wpb": "128", "bsz": "8", "num_updates": "24200", "lr": "0.00012479", "gnorm": "8.248", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "65368"} [2024-07-06 18:59:51,705][train_inner][INFO] - {"epoch": 1, "update": 0.162, "loss": "1.506", "ntokens": "125.52", "acc_total": "125.52", "n_correct": "93.075", "wer_total": "125.52", "n_error": "32.425", "ppl": "2.84", "accuracy": "74.152", "wer": "25.833", "wps": "68.3", "ups": "0.54", "wpb": "125.5", "bsz": "8", "num_updates": "24400", "lr": "0.00012578", "gnorm": "8.055", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "65736"} [2024-07-06 19:05:59,040][train_inner][INFO] - {"epoch": 1, "update": 0.163, "loss": "1.541", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "93.255", "wer_total": "126.265", "n_error": "32.98", "ppl": "2.91", "accuracy": "73.857", "wer": "26.12", "wps": "68.7", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "24600", "lr": "0.00012677", "gnorm": "7.822", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "66103"} [2024-07-06 19:12:06,608][train_inner][INFO] - {"epoch": 1, "update": 0.164, "loss": "1.492", "ntokens": "127.12", "acc_total": "127.12", "n_correct": "93.96", "wer_total": "127.12", "n_error": "33.135", "ppl": "2.81", "accuracy": "73.914", "wer": "26.066", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "24800", "lr": "0.00012776", "gnorm": "8.074", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "66471"} [2024-07-06 19:17:20,908][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-06 19:18:16,047][train_inner][INFO] - {"epoch": 1, "update": 0.166, "loss": "1.57", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "93.005", "wer_total": "126.59", "n_error": "33.55", "ppl": "2.97", "accuracy": "73.469", "wer": "26.503", "wps": "68.5", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "25000", "lr": "0.00012875", "gnorm": "8.119", "loss_scale": "1024", "train_wall": "369", "gb_free": "6.5", "wall": "66840"} [2024-07-06 19:18:16,048][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 20:00:11,495][valid][INFO] - {"epoch": 1, "valid_loss": "1.256", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.0548", "valid_wer_total": "18.1585", "valid_n_error": "4.099", "valid_ppl": "2.39", "valid_accuracy": "77.401", "valid_wer": "22.573", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "25000", "valid_best_accuracy": "77.401"} [2024-07-06 20:00:11,496][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 25000 updates [2024-07-06 20:00:11,496][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_25000.pt [2024-07-06 20:00:14,640][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_25000.pt [2024-07-06 20:00:18,804][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_25000.pt (epoch 1 @ 25000 updates, score 77.401) (writing took 7.308300702017732 seconds) [2024-07-06 20:06:26,412][train_inner][INFO] - {"epoch": 1, "update": 0.167, "loss": "1.529", "ntokens": "126.05", "acc_total": "126.05", "n_correct": "93.2", "wer_total": "126.05", "n_error": "32.81", "ppl": "2.89", "accuracy": "73.939", "wer": "26.029", "wps": "8.7", "ups": "0.07", "wpb": "126", "bsz": "8", "num_updates": "25200", "lr": "0.00012974", "gnorm": "8.022", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "69730"} [2024-07-06 20:12:34,709][train_inner][INFO] - {"epoch": 1, "update": 0.168, "loss": "1.506", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "93.31", "wer_total": "126.53", "n_error": "33.195", "ppl": "2.84", "accuracy": "73.745", "wer": "26.235", "wps": "68.7", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "25400", "lr": "0.00013073", "gnorm": "8.263", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "70099"} [2024-07-06 20:18:42,897][train_inner][INFO] - {"epoch": 1, "update": 0.17, "loss": "1.52", "ntokens": "126.755", "acc_total": "126.755", "n_correct": "93.41", "wer_total": "126.755", "n_error": "33.315", "ppl": "2.87", "accuracy": "73.693", "wer": "26.283", "wps": "68.9", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "25600", "lr": "0.00013172", "gnorm": "8.153", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "70467"} [2024-07-06 20:24:50,765][train_inner][INFO] - {"epoch": 1, "update": 0.171, "loss": "1.506", "ntokens": "126.035", "acc_total": "126.035", "n_correct": "92.815", "wer_total": "126.035", "n_error": "33.195", "ppl": "2.84", "accuracy": "73.642", "wer": "26.338", "wps": "68.5", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "25800", "lr": "0.00013271", "gnorm": "7.923", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "70835"} [2024-07-06 20:30:58,440][train_inner][INFO] - {"epoch": 1, "update": 0.172, "loss": "1.486", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "94.675", "wer_total": "127.49", "n_error": "32.79", "ppl": "2.8", "accuracy": "74.261", "wer": "25.72", "wps": "69.3", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "26000", "lr": "0.0001337", "gnorm": "7.797", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "71202"} [2024-07-06 20:37:05,797][train_inner][INFO] - {"epoch": 1, "update": 0.174, "loss": "1.468", "ntokens": "126.73", "acc_total": "126.73", "n_correct": "94.725", "wer_total": "126.73", "n_error": "31.985", "ppl": "2.77", "accuracy": "74.746", "wer": "25.239", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "26200", "lr": "0.00013469", "gnorm": "7.828", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "71570"} [2024-07-06 20:43:12,898][train_inner][INFO] - {"epoch": 1, "update": 0.175, "loss": "1.496", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "93.845", "wer_total": "127.245", "n_error": "33.36", "ppl": "2.82", "accuracy": "73.751", "wer": "26.217", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "26400", "lr": "0.00013568", "gnorm": "7.691", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "71937"} [2024-07-06 20:49:20,227][train_inner][INFO] - {"epoch": 1, "update": 0.176, "loss": "1.498", "ntokens": "126.3", "acc_total": "126.3", "n_correct": "93.045", "wer_total": "126.3", "n_error": "33.22", "ppl": "2.83", "accuracy": "73.67", "wer": "26.302", "wps": "68.8", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "26600", "lr": "0.00013667", "gnorm": "7.511", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "72304"} [2024-07-06 20:55:27,941][train_inner][INFO] - {"epoch": 1, "update": 0.178, "loss": "1.458", "ntokens": "127.37", "acc_total": "127.37", "n_correct": "94.655", "wer_total": "127.37", "n_error": "32.67", "ppl": "2.75", "accuracy": "74.315", "wer": "25.65", "wps": "69.3", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "26800", "lr": "0.00013766", "gnorm": "7.621", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "72672"} [2024-07-06 21:01:35,220][train_inner][INFO] - {"epoch": 1, "update": 0.179, "loss": "1.489", "ntokens": "126.425", "acc_total": "126.425", "n_correct": "93.73", "wer_total": "126.425", "n_error": "32.63", "ppl": "2.81", "accuracy": "74.139", "wer": "25.81", "wps": "68.8", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "27000", "lr": "0.00013865", "gnorm": "7.25", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "73039"} [2024-07-06 21:07:42,214][train_inner][INFO] - {"epoch": 1, "update": 0.18, "loss": "1.471", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "94.59", "wer_total": "127.32", "n_error": "32.705", "ppl": "2.77", "accuracy": "74.293", "wer": "25.687", "wps": "69.4", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "27200", "lr": "0.00013964", "gnorm": "7.508", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "73406"} [2024-07-06 21:13:49,534][train_inner][INFO] - {"epoch": 1, "update": 0.182, "loss": "1.436", "ntokens": "126.595", "acc_total": "126.595", "n_correct": "94.745", "wer_total": "126.595", "n_error": "31.81", "ppl": "2.71", "accuracy": "74.841", "wer": "25.127", "wps": "68.9", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "27400", "lr": "0.00014063", "gnorm": "7.257", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "73774"} [2024-07-06 21:16:53,257][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 21:58:49,038][valid][INFO] - {"epoch": 1, "valid_loss": "1.215", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.116", "valid_wer_total": "18.1585", "valid_n_error": "4.0391", "valid_ppl": "2.32", "valid_accuracy": "77.738", "valid_wer": "22.244", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "27500", "valid_best_accuracy": "77.738"} [2024-07-06 21:58:49,038][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 27500 updates [2024-07-06 21:58:49,039][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_27500.pt [2024-07-06 21:58:52,215][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_27500.pt [2024-07-06 21:58:56,307][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_27500.pt (epoch 1 @ 27500 updates, score 77.738) (writing took 7.268263339006808 seconds) [2024-07-06 22:01:59,785][train_inner][INFO] - {"epoch": 1, "update": 0.183, "loss": "1.46", "ntokens": "126.8", "acc_total": "126.8", "n_correct": "94.33", "wer_total": "126.8", "n_error": "32.43", "ppl": "2.75", "accuracy": "74.393", "wer": "25.576", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "27600", "lr": "0.00014162", "gnorm": "7.176", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "76664"} [2024-07-06 22:08:07,194][train_inner][INFO] - {"epoch": 1, "update": 0.184, "loss": "1.455", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "94.43", "wer_total": "126.805", "n_error": "32.325", "ppl": "2.74", "accuracy": "74.469", "wer": "25.492", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "27800", "lr": "0.00014261", "gnorm": "7.379", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "77031"} [2024-07-06 22:14:15,053][train_inner][INFO] - {"epoch": 1, "update": 0.186, "loss": "1.416", "ntokens": "127.6", "acc_total": "127.6", "n_correct": "95.325", "wer_total": "127.6", "n_error": "32.25", "ppl": "2.67", "accuracy": "74.706", "wer": "25.274", "wps": "69.4", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "28000", "lr": "0.0001436", "gnorm": "7.312", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "77399"} [2024-07-06 22:20:22,373][train_inner][INFO] - {"epoch": 1, "update": 0.187, "loss": "1.405", "ntokens": "128.14", "acc_total": "128.14", "n_correct": "96.3", "wer_total": "128.14", "n_error": "31.805", "ppl": "2.65", "accuracy": "75.152", "wer": "24.821", "wps": "69.8", "ups": "0.54", "wpb": "128.1", "bsz": "8", "num_updates": "28200", "lr": "0.00014459", "gnorm": "7.375", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "77766"} [2024-07-06 22:26:29,546][train_inner][INFO] - {"epoch": 1, "update": 0.188, "loss": "1.441", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "95.285", "wer_total": "127.375", "n_error": "32.04", "ppl": "2.72", "accuracy": "74.807", "wer": "25.154", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "28400", "lr": "0.00014558", "gnorm": "7.3", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "78134"} [2024-07-06 22:32:36,999][train_inner][INFO] - {"epoch": 1, "update": 0.19, "loss": "1.382", "ntokens": "127.475", "acc_total": "127.475", "n_correct": "96.365", "wer_total": "127.475", "n_error": "31.04", "ppl": "2.61", "accuracy": "75.595", "wer": "24.35", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "28600", "lr": "0.00014657", "gnorm": "7.096", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "78501"} [2024-07-06 22:38:44,760][train_inner][INFO] - {"epoch": 1, "update": 0.191, "loss": "1.42", "ntokens": "128.445", "acc_total": "128.445", "n_correct": "96.55", "wer_total": "128.445", "n_error": "31.87", "ppl": "2.68", "accuracy": "75.168", "wer": "24.812", "wps": "69.9", "ups": "0.54", "wpb": "128.4", "bsz": "8", "num_updates": "28800", "lr": "0.00014756", "gnorm": "6.742", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "78869"} [2024-07-06 22:44:52,761][train_inner][INFO] - {"epoch": 1, "update": 0.192, "loss": "1.41", "ntokens": "126.415", "acc_total": "126.415", "n_correct": "94.98", "wer_total": "126.415", "n_error": "31.405", "ppl": "2.66", "accuracy": "75.133", "wer": "24.843", "wps": "68.7", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "29000", "lr": "0.00014855", "gnorm": "6.901", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "79237"} [2024-07-06 22:47:28,646][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-06 22:51:01,957][train_inner][INFO] - {"epoch": 1, "update": 0.194, "loss": "1.46", "ntokens": "126.285", "acc_total": "126.285", "n_correct": "94.575", "wer_total": "126.285", "n_error": "31.655", "ppl": "2.75", "accuracy": "74.89", "wer": "25.066", "wps": "68.4", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "29200", "lr": "0.00014954", "gnorm": "7.079", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "79606"} [2024-07-06 22:57:09,602][train_inner][INFO] - {"epoch": 1, "update": 0.195, "loss": "1.412", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "95.605", "wer_total": "127.485", "n_error": "31.845", "ppl": "2.66", "accuracy": "74.993", "wer": "24.979", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "29400", "lr": "0.00015053", "gnorm": "6.857", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "79974"} [2024-07-06 23:03:17,127][train_inner][INFO] - {"epoch": 1, "update": 0.196, "loss": "1.443", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "94.76", "wer_total": "127.055", "n_error": "32.27", "ppl": "2.72", "accuracy": "74.582", "wer": "25.398", "wps": "69.1", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "29600", "lr": "0.00015152", "gnorm": "7.055", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "80341"} [2024-07-06 23:09:24,666][train_inner][INFO] - {"epoch": 1, "update": 0.198, "loss": "1.467", "ntokens": "126.44", "acc_total": "126.44", "n_correct": "93.53", "wer_total": "126.44", "n_error": "32.88", "ppl": "2.77", "accuracy": "73.972", "wer": "26.004", "wps": "68.8", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "29800", "lr": "0.00015251", "gnorm": "7.07", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "80709"} [2024-07-06 23:15:32,157][train_inner][INFO] - {"epoch": 1, "update": 0.199, "loss": "1.393", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "95.435", "wer_total": "126.58", "n_error": "31.075", "ppl": "2.63", "accuracy": "75.395", "wer": "24.55", "wps": "68.9", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "30000", "lr": "0.0001535", "gnorm": "6.783", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "81076"} [2024-07-06 23:15:32,157][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-06 23:57:27,909][valid][INFO] - {"epoch": 1, "valid_loss": "1.19", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.1826", "valid_wer_total": "18.1585", "valid_n_error": "3.97374", "valid_ppl": "2.28", "valid_accuracy": "78.104", "valid_wer": "21.884", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "78.104"} [2024-07-06 23:57:27,909][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 30000 updates [2024-07-06 23:57:27,910][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_30000.pt [2024-07-06 23:57:31,103][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_30000.pt [2024-07-06 23:57:35,281][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_30000.pt (epoch 1 @ 30000 updates, score 78.104) (writing took 7.37203776097158 seconds) [2024-07-07 00:03:42,564][train_inner][INFO] - {"epoch": 1, "update": 0.2, "loss": "1.416", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "95.31", "wer_total": "127.06", "n_error": "31.73", "ppl": "2.67", "accuracy": "75.012", "wer": "24.972", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "30200", "lr": "0.00015449", "gnorm": "6.608", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "83967"} [2024-07-07 00:09:50,120][train_inner][INFO] - {"epoch": 1, "update": 0.202, "loss": "1.459", "ntokens": "125.765", "acc_total": "125.765", "n_correct": "94.205", "wer_total": "125.765", "n_error": "31.52", "ppl": "2.75", "accuracy": "74.906", "wer": "25.063", "wps": "68.4", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "30400", "lr": "0.00015548", "gnorm": "6.981", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "84334"} [2024-07-07 00:15:57,815][train_inner][INFO] - {"epoch": 1, "update": 0.203, "loss": "1.437", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "95.14", "wer_total": "127.335", "n_error": "32.15", "ppl": "2.71", "accuracy": "74.716", "wer": "25.248", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "30600", "lr": "0.00015647", "gnorm": "6.923", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "84702"} [2024-07-07 00:22:05,510][train_inner][INFO] - {"epoch": 1, "update": 0.204, "loss": "1.463", "ntokens": "126.715", "acc_total": "126.715", "n_correct": "94.605", "wer_total": "126.715", "n_error": "32.1", "ppl": "2.76", "accuracy": "74.66", "wer": "25.332", "wps": "68.9", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "30800", "lr": "0.00015746", "gnorm": "6.558", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "85070"} [2024-07-07 00:28:12,827][train_inner][INFO] - {"epoch": 1, "update": 0.206, "loss": "1.402", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "95.595", "wer_total": "127.185", "n_error": "31.575", "ppl": "2.64", "accuracy": "75.162", "wer": "24.826", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "31000", "lr": "0.00015845", "gnorm": "6.638", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "85437"} [2024-07-07 00:34:20,250][train_inner][INFO] - {"epoch": 1, "update": 0.207, "loss": "1.388", "ntokens": "126.06", "acc_total": "126.06", "n_correct": "95.13", "wer_total": "126.06", "n_error": "30.905", "ppl": "2.62", "accuracy": "75.464", "wer": "24.516", "wps": "68.6", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "31200", "lr": "0.00015944", "gnorm": "6.499", "loss_scale": "4096", "train_wall": "367", "gb_free": "6.5", "wall": "85804"} [2024-07-07 00:36:12,255][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 00:40:29,932][train_inner][INFO] - {"epoch": 1, "update": 0.208, "loss": "1.404", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "95.135", "wer_total": "126.87", "n_error": "31.7", "ppl": "2.65", "accuracy": "74.986", "wer": "24.986", "wps": "68.6", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "31400", "lr": "0.00016043", "gnorm": "6.79", "loss_scale": "2048", "train_wall": "369", "gb_free": "6.5", "wall": "86174"} [2024-07-07 00:46:37,492][train_inner][INFO] - {"epoch": 1, "update": 0.21, "loss": "1.448", "ntokens": "127.325", "acc_total": "127.325", "n_correct": "94.71", "wer_total": "127.325", "n_error": "32.59", "ppl": "2.73", "accuracy": "74.384", "wer": "25.596", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "31600", "lr": "0.00016142", "gnorm": "6.621", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "86542"} [2024-07-07 00:52:44,964][train_inner][INFO] - {"epoch": 1, "update": 0.211, "loss": "1.336", "ntokens": "126.41", "acc_total": "126.41", "n_correct": "95.59", "wer_total": "126.41", "n_error": "30.805", "ppl": "2.52", "accuracy": "75.619", "wer": "24.369", "wps": "68.8", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "31800", "lr": "0.00016241", "gnorm": "6.265", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "86909"} [2024-07-07 00:58:52,261][train_inner][INFO] - {"epoch": 1, "update": 0.212, "loss": "1.362", "ntokens": "126.415", "acc_total": "126.415", "n_correct": "95.62", "wer_total": "126.415", "n_error": "30.75", "ppl": "2.57", "accuracy": "75.64", "wer": "24.325", "wps": "68.8", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "32000", "lr": "0.0001634", "gnorm": "6.488", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "87276"} [2024-07-07 01:04:59,393][train_inner][INFO] - {"epoch": 1, "update": 0.214, "loss": "1.425", "ntokens": "126.75", "acc_total": "126.75", "n_correct": "95.09", "wer_total": "126.75", "n_error": "31.615", "ppl": "2.69", "accuracy": "75.022", "wer": "24.943", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "32200", "lr": "0.00016439", "gnorm": "6.443", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "87643"} [2024-07-07 01:11:06,932][train_inner][INFO] - {"epoch": 1, "update": 0.215, "loss": "1.404", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "94.91", "wer_total": "126.53", "n_error": "31.56", "ppl": "2.65", "accuracy": "75.01", "wer": "24.943", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "32400", "lr": "0.00016538", "gnorm": "6.436", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "88011"} [2024-07-07 01:14:10,825][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 01:56:03,029][valid][INFO] - {"epoch": 1, "valid_loss": "1.131", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.3587", "valid_wer_total": "18.1585", "valid_n_error": "3.79471", "valid_ppl": "2.19", "valid_accuracy": "79.074", "valid_wer": "20.898", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "32500", "valid_best_accuracy": "79.074"} [2024-07-07 01:56:03,030][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 32500 updates [2024-07-07 01:56:03,030][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_32500.pt [2024-07-07 01:56:06,235][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_32500.pt [2024-07-07 01:56:10,340][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_32500.pt (epoch 1 @ 32500 updates, score 79.074) (writing took 7.309856694017071 seconds) [2024-07-07 01:59:13,660][train_inner][INFO] - {"epoch": 1, "update": 0.216, "loss": "1.355", "ntokens": "127.16", "acc_total": "127.16", "n_correct": "96.235", "wer_total": "127.16", "n_error": "30.9", "ppl": "2.56", "accuracy": "75.68", "wer": "24.3", "wps": "8.8", "ups": "0.07", "wpb": "127.2", "bsz": "8", "num_updates": "32600", "lr": "0.00016637", "gnorm": "6.183", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "90898"} [2024-07-07 02:05:20,939][train_inner][INFO] - {"epoch": 1, "update": 0.217, "loss": "1.431", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "95.95", "wer_total": "127.135", "n_error": "31.16", "ppl": "2.7", "accuracy": "75.471", "wer": "24.509", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "32800", "lr": "0.00016736", "gnorm": "6.357", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "91265"} [2024-07-07 02:11:27,709][train_inner][INFO] - {"epoch": 1, "update": 0.219, "loss": "1.367", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "96.04", "wer_total": "126.675", "n_error": "30.6", "ppl": "2.58", "accuracy": "75.816", "wer": "24.156", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "33000", "lr": "0.00016835", "gnorm": "6.36", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "91632"} [2024-07-07 02:17:34,662][train_inner][INFO] - {"epoch": 1, "update": 0.22, "loss": "1.343", "ntokens": "127.285", "acc_total": "127.285", "n_correct": "97.59", "wer_total": "127.285", "n_error": "29.67", "ppl": "2.54", "accuracy": "76.67", "wer": "23.31", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "33200", "lr": "0.00016934", "gnorm": "6.451", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "91999"} [2024-07-07 02:23:42,411][train_inner][INFO] - {"epoch": 1, "update": 0.221, "loss": "1.428", "ntokens": "128.055", "acc_total": "128.055", "n_correct": "97.1", "wer_total": "128.055", "n_error": "30.89", "ppl": "2.69", "accuracy": "75.827", "wer": "24.122", "wps": "69.6", "ups": "0.54", "wpb": "128.1", "bsz": "8", "num_updates": "33400", "lr": "0.00017033", "gnorm": "6.271", "loss_scale": "4096", "train_wall": "367", "gb_free": "6.5", "wall": "92366"} [2024-07-07 02:29:49,153][train_inner][INFO] - {"epoch": 1, "update": 0.223, "loss": "1.398", "ntokens": "125.905", "acc_total": "125.905", "n_correct": "96.155", "wer_total": "125.905", "n_error": "29.735", "ppl": "2.64", "accuracy": "76.371", "wer": "23.617", "wps": "68.7", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "33600", "lr": "0.00017132", "gnorm": "6.331", "loss_scale": "4096", "train_wall": "366", "gb_free": "6.5", "wall": "92733"} [2024-07-07 02:35:56,437][train_inner][INFO] - {"epoch": 1, "update": 0.224, "loss": "1.363", "ntokens": "126.19", "acc_total": "126.19", "n_correct": "97.795", "wer_total": "126.19", "n_error": "28.375", "ppl": "2.57", "accuracy": "77.498", "wer": "22.486", "wps": "68.7", "ups": "0.54", "wpb": "126.2", "bsz": "8", "num_updates": "33800", "lr": "0.00017231", "gnorm": "6.015", "loss_scale": "4096", "train_wall": "367", "gb_free": "6.5", "wall": "93100"} [2024-07-07 02:36:29,358][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 02:42:04,970][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "1.365", "ntokens": "126.445", "acc_total": "126.445", "n_correct": "96.08", "wer_total": "126.445", "n_error": "30.325", "ppl": "2.58", "accuracy": "75.986", "wer": "23.983", "wps": "68.6", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "34000", "lr": "0.0001733", "gnorm": "6.363", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "93469"} [2024-07-07 02:48:11,626][train_inner][INFO] - {"epoch": 1, "update": 0.227, "loss": "1.383", "ntokens": "126.62", "acc_total": "126.62", "n_correct": "96.13", "wer_total": "126.62", "n_error": "30.465", "ppl": "2.61", "accuracy": "75.92", "wer": "24.06", "wps": "69.1", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "34200", "lr": "0.00017429", "gnorm": "6.236", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "93836"} [2024-07-07 02:54:18,504][train_inner][INFO] - {"epoch": 1, "update": 0.228, "loss": "1.326", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "98.255", "wer_total": "126.675", "n_error": "28.385", "ppl": "2.51", "accuracy": "77.565", "wer": "22.408", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "34400", "lr": "0.00017528", "gnorm": "6.097", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "94203"} [2024-07-07 03:00:25,454][train_inner][INFO] - {"epoch": 1, "update": 0.229, "loss": "1.383", "ntokens": "127.77", "acc_total": "127.77", "n_correct": "97.64", "wer_total": "127.77", "n_error": "30.11", "ppl": "2.61", "accuracy": "76.419", "wer": "23.566", "wps": "69.6", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "34600", "lr": "0.00017627", "gnorm": "6.223", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "94570"} [2024-07-07 03:06:32,698][train_inner][INFO] - {"epoch": 1, "update": 0.231, "loss": "1.378", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "97.465", "wer_total": "126.905", "n_error": "29.435", "ppl": "2.6", "accuracy": "76.802", "wer": "23.195", "wps": "69.1", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "34800", "lr": "0.00017726", "gnorm": "6.146", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "94937"} [2024-07-07 03:12:39,836][train_inner][INFO] - {"epoch": 1, "update": 0.232, "loss": "1.341", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "98.085", "wer_total": "127.075", "n_error": "28.94", "ppl": "2.53", "accuracy": "77.187", "wer": "22.774", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "35000", "lr": "0.00017825", "gnorm": "5.907", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "95304"} [2024-07-07 03:12:39,837][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 03:54:34,892][valid][INFO] - {"epoch": 1, "valid_loss": "1.115", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.4005", "valid_wer_total": "18.1585", "valid_n_error": "3.75519", "valid_ppl": "2.17", "valid_accuracy": "79.304", "valid_wer": "20.68", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "35000", "valid_best_accuracy": "79.304"} [2024-07-07 03:54:34,893][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 35000 updates [2024-07-07 03:54:34,893][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_35000.pt [2024-07-07 03:54:38,068][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_35000.pt [2024-07-07 03:54:42,239][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_35000.pt (epoch 1 @ 35000 updates, score 79.304) (writing took 7.346844801970292 seconds) [2024-07-07 04:00:49,419][train_inner][INFO] - {"epoch": 1, "update": 0.233, "loss": "1.342", "ntokens": "126.7", "acc_total": "126.7", "n_correct": "96.31", "wer_total": "126.7", "n_error": "30.36", "ppl": "2.54", "accuracy": "76.014", "wer": "23.962", "wps": "8.8", "ups": "0.07", "wpb": "126.7", "bsz": "8", "num_updates": "35200", "lr": "0.00017924", "gnorm": "5.786", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "98193"} [2024-07-07 04:06:56,231][train_inner][INFO] - {"epoch": 1, "update": 0.235, "loss": "1.323", "ntokens": "125.835", "acc_total": "125.835", "n_correct": "97.065", "wer_total": "125.835", "n_error": "28.735", "ppl": "2.5", "accuracy": "77.137", "wer": "22.835", "wps": "68.6", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "35400", "lr": "0.00018023", "gnorm": "5.901", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "98560"} [2024-07-07 04:13:03,276][train_inner][INFO] - {"epoch": 1, "update": 0.236, "loss": "1.333", "ntokens": "125.99", "acc_total": "125.99", "n_correct": "96.445", "wer_total": "125.99", "n_error": "29.52", "ppl": "2.52", "accuracy": "76.55", "wer": "23.43", "wps": "68.7", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "35600", "lr": "0.00018122", "gnorm": "5.896", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "98927"} [2024-07-07 04:19:10,269][train_inner][INFO] - {"epoch": 1, "update": 0.237, "loss": "1.343", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "97.545", "wer_total": "126.815", "n_error": "29.255", "ppl": "2.54", "accuracy": "76.919", "wer": "23.069", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "35800", "lr": "0.00018221", "gnorm": "5.773", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "99294"} [2024-07-07 04:25:18,274][train_inner][INFO] - {"epoch": 1, "update": 0.239, "loss": "1.352", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "96.745", "wer_total": "127.425", "n_error": "30.635", "ppl": "2.55", "accuracy": "75.923", "wer": "24.042", "wps": "69.3", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "36000", "lr": "0.0001832", "gnorm": "6.102", "loss_scale": "4096", "train_wall": "366", "gb_free": "6.5", "wall": "99662"} [2024-07-07 04:26:31,687][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 04:31:26,839][train_inner][INFO] - {"epoch": 1, "update": 0.24, "loss": "1.28", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "98.035", "wer_total": "127.42", "n_error": "29.36", "ppl": "2.43", "accuracy": "76.938", "wer": "23.042", "wps": "69.1", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "36200", "lr": "0.00018419", "gnorm": "5.866", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "100031"} [2024-07-07 04:37:33,777][train_inner][INFO] - {"epoch": 1, "update": 0.241, "loss": "1.363", "ntokens": "125.91", "acc_total": "125.91", "n_correct": "97.35", "wer_total": "125.91", "n_error": "28.535", "ppl": "2.57", "accuracy": "77.317", "wer": "22.663", "wps": "68.6", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "36400", "lr": "0.00018518", "gnorm": "5.831", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "100398"} [2024-07-07 04:43:40,592][train_inner][INFO] - {"epoch": 1, "update": 0.243, "loss": "1.405", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "96.785", "wer_total": "127.17", "n_error": "30.35", "ppl": "2.65", "accuracy": "76.107", "wer": "23.866", "wps": "69.3", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "36600", "lr": "0.00018617", "gnorm": "5.936", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "100765"} [2024-07-07 04:49:47,300][train_inner][INFO] - {"epoch": 1, "update": 0.244, "loss": "1.382", "ntokens": "126.43", "acc_total": "126.43", "n_correct": "96.54", "wer_total": "126.43", "n_error": "29.865", "ppl": "2.61", "accuracy": "76.358", "wer": "23.622", "wps": "69", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "36800", "lr": "0.00018716", "gnorm": "6.185", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "101131"} [2024-07-07 04:55:54,226][train_inner][INFO] - {"epoch": 1, "update": 0.245, "loss": "1.33", "ntokens": "126.66", "acc_total": "126.66", "n_correct": "96.665", "wer_total": "126.66", "n_error": "29.955", "ppl": "2.51", "accuracy": "76.318", "wer": "23.65", "wps": "69", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "37000", "lr": "0.00018815", "gnorm": "5.746", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "101498"} [2024-07-07 05:02:00,887][train_inner][INFO] - {"epoch": 1, "update": 0.247, "loss": "1.319", "ntokens": "126.125", "acc_total": "126.125", "n_correct": "96.385", "wer_total": "126.125", "n_error": "29.725", "ppl": "2.49", "accuracy": "76.42", "wer": "23.568", "wps": "68.8", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "37200", "lr": "0.00018914", "gnorm": "5.927", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "101865"} [2024-07-07 05:08:08,124][train_inner][INFO] - {"epoch": 1, "update": 0.248, "loss": "1.342", "ntokens": "127.81", "acc_total": "127.81", "n_correct": "98.21", "wer_total": "127.81", "n_error": "29.57", "ppl": "2.54", "accuracy": "76.841", "wer": "23.136", "wps": "69.6", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "37400", "lr": "0.00019013", "gnorm": "5.602", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "102232"} [2024-07-07 05:11:11,653][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 05:53:03,594][valid][INFO] - {"epoch": 1, "valid_loss": "1.095", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.8039", "valid_wer_total": "18.1585", "valid_n_error": "3.35244", "valid_ppl": "2.14", "valid_accuracy": "81.526", "valid_wer": "18.462", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "37500", "valid_best_accuracy": "81.526"} [2024-07-07 05:53:03,594][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 37500 updates [2024-07-07 05:53:03,595][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_37500.pt [2024-07-07 05:53:06,843][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_37500.pt [2024-07-07 05:53:10,973][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_37500.pt (epoch 1 @ 37500 updates, score 81.526) (writing took 7.378277765004896 seconds) [2024-07-07 05:56:14,073][train_inner][INFO] - {"epoch": 1, "update": 0.249, "loss": "1.303", "ntokens": "126.685", "acc_total": "126.685", "n_correct": "99.385", "wer_total": "126.685", "n_error": "27.26", "ppl": "2.47", "accuracy": "78.45", "wer": "21.518", "wps": "8.8", "ups": "0.07", "wpb": "126.7", "bsz": "8", "num_updates": "37600", "lr": "0.00019112", "gnorm": "5.427", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "105118"} [2024-07-07 06:02:21,496][train_inner][INFO] - {"epoch": 1, "update": 0.251, "loss": "1.341", "ntokens": "128.245", "acc_total": "128.245", "n_correct": "100.465", "wer_total": "128.245", "n_error": "27.77", "ppl": "2.53", "accuracy": "78.338", "wer": "21.654", "wps": "69.8", "ups": "0.54", "wpb": "128.2", "bsz": "8", "num_updates": "37800", "lr": "0.00019211", "gnorm": "5.602", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "105486"} [2024-07-07 06:08:28,553][train_inner][INFO] - {"epoch": 1, "update": 0.252, "loss": "1.321", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "96.45", "wer_total": "126.625", "n_error": "30.155", "ppl": "2.5", "accuracy": "76.17", "wer": "23.814", "wps": "69", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "38000", "lr": "0.0001931", "gnorm": "5.684", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "105853"} [2024-07-07 06:14:24,446][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 06:14:37,300][train_inner][INFO] - {"epoch": 1, "update": 0.253, "loss": "1.321", "ntokens": "125.475", "acc_total": "125.475", "n_correct": "95.98", "wer_total": "125.475", "n_error": "29.48", "ppl": "2.5", "accuracy": "76.493", "wer": "23.495", "wps": "68.1", "ups": "0.54", "wpb": "125.5", "bsz": "8", "num_updates": "38200", "lr": "0.00019409", "gnorm": "5.464", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "106221"} [2024-07-07 06:20:43,952][train_inner][INFO] - {"epoch": 1, "update": 0.255, "loss": "1.323", "ntokens": "126.47", "acc_total": "126.47", "n_correct": "97.6", "wer_total": "126.47", "n_error": "28.855", "ppl": "2.5", "accuracy": "77.172", "wer": "22.816", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "38400", "lr": "0.00019508", "gnorm": "5.48", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "106588"} [2024-07-07 06:26:50,588][train_inner][INFO] - {"epoch": 1, "update": 0.256, "loss": "1.341", "ntokens": "126.32", "acc_total": "126.32", "n_correct": "97.51", "wer_total": "126.32", "n_error": "28.775", "ppl": "2.53", "accuracy": "77.193", "wer": "22.779", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "38600", "lr": "0.00019607", "gnorm": "5.249", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "106955"} [2024-07-07 06:32:57,296][train_inner][INFO] - {"epoch": 1, "update": 0.257, "loss": "1.275", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "98.155", "wer_total": "126.555", "n_error": "28.39", "ppl": "2.42", "accuracy": "77.559", "wer": "22.433", "wps": "69", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "38800", "lr": "0.00019706", "gnorm": "5.693", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "107321"} [2024-07-07 06:39:04,312][train_inner][INFO] - {"epoch": 1, "update": 0.259, "loss": "1.322", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "96.715", "wer_total": "126.875", "n_error": "30.13", "ppl": "2.5", "accuracy": "76.229", "wer": "23.748", "wps": "69.1", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "39000", "lr": "0.00019805", "gnorm": "5.423", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "107688"} [2024-07-07 06:45:11,108][train_inner][INFO] - {"epoch": 1, "update": 0.26, "loss": "1.335", "ntokens": "126.335", "acc_total": "126.335", "n_correct": "96.025", "wer_total": "126.335", "n_error": "30.29", "ppl": "2.52", "accuracy": "76.008", "wer": "23.976", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "39200", "lr": "0.00019904", "gnorm": "5.735", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "108055"} [2024-07-07 06:51:17,599][train_inner][INFO] - {"epoch": 1, "update": 0.261, "loss": "1.296", "ntokens": "124.18", "acc_total": "124.18", "n_correct": "94.625", "wer_total": "124.18", "n_error": "29.515", "ppl": "2.46", "accuracy": "76.2", "wer": "23.768", "wps": "67.8", "ups": "0.55", "wpb": "124.2", "bsz": "8", "num_updates": "39400", "lr": "0.00020003", "gnorm": "5.237", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "108422"} [2024-07-07 06:57:25,871][train_inner][INFO] - {"epoch": 1, "update": 0.263, "loss": "1.359", "ntokens": "126.55", "acc_total": "126.55", "n_correct": "95.93", "wer_total": "126.55", "n_error": "30.615", "ppl": "2.57", "accuracy": "75.804", "wer": "24.192", "wps": "68.7", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "39600", "lr": "0.00020102", "gnorm": "5.446", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "108790"} [2024-07-07 07:01:55,176][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-07 07:03:34,299][train_inner][INFO] - {"epoch": 1, "update": 0.264, "loss": "1.314", "ntokens": "126.495", "acc_total": "126.495", "n_correct": "96.33", "wer_total": "126.495", "n_error": "30.115", "ppl": "2.49", "accuracy": "76.153", "wer": "23.807", "wps": "68.7", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "39800", "lr": "0.00020201", "gnorm": "5.259", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "109158"} [2024-07-07 07:09:41,019][train_inner][INFO] - {"epoch": 1, "update": 0.265, "loss": "1.326", "ntokens": "126.8", "acc_total": "126.8", "n_correct": "96.735", "wer_total": "126.8", "n_error": "30.035", "ppl": "2.51", "accuracy": "76.289", "wer": "23.687", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "40000", "lr": "0.000203", "gnorm": "5.551", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "109525"} [2024-07-07 07:09:41,020][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 07:51:34,515][valid][INFO] - {"epoch": 1, "valid_loss": "1.092", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.3897", "valid_wer_total": "18.1585", "valid_n_error": "3.76607", "valid_ppl": "2.13", "valid_accuracy": "79.245", "valid_wer": "20.74", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "40000", "valid_best_accuracy": "81.526"} [2024-07-07 07:51:34,516][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 40000 updates [2024-07-07 07:51:34,516][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_40000.pt [2024-07-07 07:51:37,654][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_40000.pt [2024-07-07 07:51:39,793][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_40000.pt (epoch 1 @ 40000 updates, score 79.245) (writing took 5.277235171000939 seconds) [2024-07-07 07:57:46,991][train_inner][INFO] - {"epoch": 1, "update": 0.267, "loss": "1.283", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "97.22", "wer_total": "127.025", "n_error": "29.785", "ppl": "2.43", "accuracy": "76.536", "wer": "23.448", "wps": "8.8", "ups": "0.07", "wpb": "127", "bsz": "8", "num_updates": "40200", "lr": "0.00020399", "gnorm": "5.382", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "112411"} [2024-07-07 08:03:53,821][train_inner][INFO] - {"epoch": 1, "update": 0.268, "loss": "1.299", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "96.815", "wer_total": "127.335", "n_error": "30.475", "ppl": "2.46", "accuracy": "76.032", "wer": "23.933", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "40400", "lr": "0.00020498", "gnorm": "5.303", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "112778"} [2024-07-07 08:10:00,683][train_inner][INFO] - {"epoch": 1, "update": 0.269, "loss": "1.278", "ntokens": "126.315", "acc_total": "126.315", "n_correct": "97.18", "wer_total": "126.315", "n_error": "29.11", "ppl": "2.43", "accuracy": "76.935", "wer": "23.046", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "40600", "lr": "0.00020597", "gnorm": "5.359", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "113145"} [2024-07-07 08:16:07,609][train_inner][INFO] - {"epoch": 1, "update": 0.271, "loss": "1.304", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "98.85", "wer_total": "127.345", "n_error": "28.47", "ppl": "2.47", "accuracy": "77.624", "wer": "22.357", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "40800", "lr": "0.00020696", "gnorm": "5.293", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "113512"} [2024-07-07 08:22:14,795][train_inner][INFO] - {"epoch": 1, "update": 0.272, "loss": "1.264", "ntokens": "127.54", "acc_total": "127.54", "n_correct": "99.745", "wer_total": "127.54", "n_error": "27.765", "ppl": "2.4", "accuracy": "78.207", "wer": "21.77", "wps": "69.5", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "41000", "lr": "0.00020795", "gnorm": "5.187", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "113879"} [2024-07-07 08:28:23,150][train_inner][INFO] - {"epoch": 1, "update": 0.273, "loss": "1.273", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "99.98", "wer_total": "127.015", "n_error": "27.02", "ppl": "2.42", "accuracy": "78.715", "wer": "21.273", "wps": "69", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "41200", "lr": "0.00020894", "gnorm": "4.993", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "114247"} [2024-07-07 08:34:29,918][train_inner][INFO] - {"epoch": 1, "update": 0.275, "loss": "1.347", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "96.15", "wer_total": "126.68", "n_error": "30.5", "ppl": "2.54", "accuracy": "75.9", "wer": "24.076", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "41400", "lr": "0.00020993", "gnorm": "5.17", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "114614"} [2024-07-07 08:40:36,543][train_inner][INFO] - {"epoch": 1, "update": 0.276, "loss": "1.277", "ntokens": "126.105", "acc_total": "126.105", "n_correct": "96.78", "wer_total": "126.105", "n_error": "29.29", "ppl": "2.42", "accuracy": "76.746", "wer": "23.227", "wps": "68.8", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "41600", "lr": "0.00021092", "gnorm": "5.146", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "114981"} [2024-07-07 08:46:43,443][train_inner][INFO] - {"epoch": 1, "update": 0.277, "loss": "1.276", "ntokens": "126.235", "acc_total": "126.235", "n_correct": "96.64", "wer_total": "126.235", "n_error": "29.545", "ppl": "2.42", "accuracy": "76.556", "wer": "23.405", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "41800", "lr": "0.00021191", "gnorm": "5.315", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "115347"} [2024-07-07 08:52:50,167][train_inner][INFO] - {"epoch": 1, "update": 0.279, "loss": "1.311", "ntokens": "127.02", "acc_total": "127.02", "n_correct": "99.365", "wer_total": "127.02", "n_error": "27.65", "ppl": "2.48", "accuracy": "78.228", "wer": "21.768", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "42000", "lr": "0.0002129", "gnorm": "5.026", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "115714"} [2024-07-07 08:58:57,348][train_inner][INFO] - {"epoch": 1, "update": 0.28, "loss": "1.295", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "100.43", "wer_total": "126.865", "n_error": "26.425", "ppl": "2.45", "accuracy": "79.163", "wer": "20.829", "wps": "69.1", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "42200", "lr": "0.00021389", "gnorm": "5.012", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "116081"} [2024-07-07 09:05:03,974][train_inner][INFO] - {"epoch": 1, "update": 0.281, "loss": "1.296", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "97.93", "wer_total": "126.265", "n_error": "28.305", "ppl": "2.46", "accuracy": "77.559", "wer": "22.417", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "42400", "lr": "0.00021488", "gnorm": "5.069", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "116448"} [2024-07-07 09:08:07,316][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 09:49:58,207][valid][INFO] - {"epoch": 1, "valid_loss": "1.069", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.4171", "valid_wer_total": "18.1585", "valid_n_error": "3.73902", "valid_ppl": "2.1", "valid_accuracy": "79.396", "valid_wer": "20.591", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "42500", "valid_best_accuracy": "81.526"} [2024-07-07 09:49:58,207][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 42500 updates [2024-07-07 09:49:58,208][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_42500.pt [2024-07-07 09:50:01,380][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_42500.pt [2024-07-07 09:50:03,456][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_42500.pt (epoch 1 @ 42500 updates, score 79.396) (writing took 5.248767814016901 seconds) [2024-07-07 09:53:06,730][train_inner][INFO] - {"epoch": 1, "update": 0.282, "loss": "1.282", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "97.625", "wer_total": "127.235", "n_error": "29.6", "ppl": "2.43", "accuracy": "76.728", "wer": "23.264", "wps": "8.8", "ups": "0.07", "wpb": "127.2", "bsz": "8", "num_updates": "42600", "lr": "0.00021587", "gnorm": "5.09", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "119331"} [2024-07-07 09:59:14,088][train_inner][INFO] - {"epoch": 1, "update": 0.284, "loss": "1.299", "ntokens": "127.635", "acc_total": "127.635", "n_correct": "98.675", "wer_total": "127.635", "n_error": "28.945", "ppl": "2.46", "accuracy": "77.31", "wer": "22.678", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "42800", "lr": "0.00021686", "gnorm": "4.977", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "119698"} [2024-07-07 10:05:21,432][train_inner][INFO] - {"epoch": 1, "update": 0.285, "loss": "1.278", "ntokens": "127.48", "acc_total": "127.48", "n_correct": "101.22", "wer_total": "127.48", "n_error": "26.25", "ppl": "2.42", "accuracy": "79.401", "wer": "20.591", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "43000", "lr": "0.00021785", "gnorm": "4.881", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "120065"} [2024-07-07 10:11:28,167][train_inner][INFO] - {"epoch": 1, "update": 0.286, "loss": "1.283", "ntokens": "126.455", "acc_total": "126.455", "n_correct": "100.65", "wer_total": "126.455", "n_error": "25.79", "ppl": "2.43", "accuracy": "79.594", "wer": "20.395", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "43200", "lr": "0.00021884", "gnorm": "4.889", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "120432"} [2024-07-07 10:17:35,205][train_inner][INFO] - {"epoch": 1, "update": 0.288, "loss": "1.274", "ntokens": "127.21", "acc_total": "127.21", "n_correct": "100.675", "wer_total": "127.21", "n_error": "26.525", "ppl": "2.42", "accuracy": "79.141", "wer": "20.851", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "43400", "lr": "0.00021983", "gnorm": "5.067", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "120799"} [2024-07-07 10:23:42,278][train_inner][INFO] - {"epoch": 1, "update": 0.289, "loss": "1.266", "ntokens": "128.27", "acc_total": "128.27", "n_correct": "101.495", "wer_total": "128.27", "n_error": "26.74", "ppl": "2.41", "accuracy": "79.126", "wer": "20.847", "wps": "69.9", "ups": "0.54", "wpb": "128.3", "bsz": "8", "num_updates": "43600", "lr": "0.00022082", "gnorm": "4.839", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "121166"} [2024-07-07 10:29:49,239][train_inner][INFO] - {"epoch": 1, "update": 0.29, "loss": "1.289", "ntokens": "125.89", "acc_total": "125.89", "n_correct": "97.905", "wer_total": "125.89", "n_error": "27.97", "ppl": "2.44", "accuracy": "77.77", "wer": "22.218", "wps": "68.6", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "43800", "lr": "0.00022181", "gnorm": "5.002", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "121533"} [2024-07-07 10:35:56,084][train_inner][INFO] - {"epoch": 1, "update": 0.292, "loss": "1.26", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "99.175", "wer_total": "127.2", "n_error": "28.015", "ppl": "2.39", "accuracy": "77.968", "wer": "22.024", "wps": "69.3", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "44000", "lr": "0.0002228", "gnorm": "4.74", "loss_scale": "4096", "train_wall": "366", "gb_free": "6.5", "wall": "121900"} [2024-07-07 10:36:36,438][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 10:42:05,333][train_inner][INFO] - {"epoch": 1, "update": 0.293, "loss": "1.282", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "100.15", "wer_total": "126.665", "n_error": "26.49", "ppl": "2.43", "accuracy": "79.067", "wer": "20.913", "wps": "68.6", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "44200", "lr": "0.00022379", "gnorm": "4.869", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "122269"} [2024-07-07 10:48:12,323][train_inner][INFO] - {"epoch": 1, "update": 0.294, "loss": "1.314", "ntokens": "126.465", "acc_total": "126.465", "n_correct": "99.545", "wer_total": "126.465", "n_error": "26.91", "ppl": "2.49", "accuracy": "78.713", "wer": "21.279", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "44400", "lr": "0.00022478", "gnorm": "4.845", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "122636"} [2024-07-07 10:54:19,332][train_inner][INFO] - {"epoch": 1, "update": 0.296, "loss": "1.243", "ntokens": "126.48", "acc_total": "126.48", "n_correct": "99.715", "wer_total": "126.48", "n_error": "26.745", "ppl": "2.37", "accuracy": "78.839", "wer": "21.146", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "44600", "lr": "0.00022577", "gnorm": "4.861", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "123003"} [2024-07-07 11:00:26,122][train_inner][INFO] - {"epoch": 1, "update": 0.297, "loss": "1.26", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "98.405", "wer_total": "127.055", "n_error": "28.625", "ppl": "2.4", "accuracy": "77.451", "wer": "22.53", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "44800", "lr": "0.00022676", "gnorm": "4.634", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "123370"} [2024-07-07 11:06:33,372][train_inner][INFO] - {"epoch": 1, "update": 0.298, "loss": "1.286", "ntokens": "125.79", "acc_total": "125.79", "n_correct": "99.515", "wer_total": "125.79", "n_error": "26.27", "ppl": "2.44", "accuracy": "79.112", "wer": "20.884", "wps": "68.5", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "45000", "lr": "0.00022775", "gnorm": "4.857", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "123737"} [2024-07-07 11:06:33,372][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 11:48:26,606][valid][INFO] - {"epoch": 1, "valid_loss": "1.095", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.8563", "valid_wer_total": "18.1585", "valid_n_error": "3.30088", "valid_ppl": "2.14", "valid_accuracy": "81.815", "valid_wer": "18.178", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "45000", "valid_best_accuracy": "81.815"} [2024-07-07 11:48:26,606][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 45000 updates [2024-07-07 11:48:26,607][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_45000.pt [2024-07-07 11:48:29,757][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_45000.pt [2024-07-07 11:48:33,919][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_45000.pt (epoch 1 @ 45000 updates, score 81.815) (writing took 7.312654301989824 seconds) [2024-07-07 11:54:41,010][train_inner][INFO] - {"epoch": 1, "update": 0.3, "loss": "1.272", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "101.99", "wer_total": "126.815", "n_error": "24.8", "ppl": "2.41", "accuracy": "80.424", "wer": "19.556", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "45200", "lr": "0.00022874", "gnorm": "4.613", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "126625"} [2024-07-07 12:00:48,172][train_inner][INFO] - {"epoch": 1, "update": 0.301, "loss": "1.23", "ntokens": "127.05", "acc_total": "127.05", "n_correct": "102.83", "wer_total": "127.05", "n_error": "24.195", "ppl": "2.35", "accuracy": "80.937", "wer": "19.044", "wps": "69.2", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "45400", "lr": "0.00022973", "gnorm": "4.613", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "126992"} [2024-07-07 12:06:55,256][train_inner][INFO] - {"epoch": 1, "update": 0.302, "loss": "1.273", "ntokens": "127.61", "acc_total": "127.61", "n_correct": "101.565", "wer_total": "127.61", "n_error": "26.025", "ppl": "2.42", "accuracy": "79.59", "wer": "20.394", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "45600", "lr": "0.00023072", "gnorm": "4.56", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "127359"} [2024-07-07 12:13:02,601][train_inner][INFO] - {"epoch": 1, "update": 0.304, "loss": "1.307", "ntokens": "127.25", "acc_total": "127.25", "n_correct": "101.81", "wer_total": "127.25", "n_error": "25.43", "ppl": "2.47", "accuracy": "80.008", "wer": "19.984", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "45800", "lr": "0.00023171", "gnorm": "4.793", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "127727"} [2024-07-07 12:19:10,290][train_inner][INFO] - {"epoch": 1, "update": 0.305, "loss": "1.298", "ntokens": "127.495", "acc_total": "127.495", "n_correct": "100.825", "wer_total": "127.495", "n_error": "26.66", "ppl": "2.46", "accuracy": "79.082", "wer": "20.911", "wps": "69.3", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "46000", "lr": "0.0002327", "gnorm": "4.538", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "128094"} [2024-07-07 12:25:17,561][train_inner][INFO] - {"epoch": 1, "update": 0.306, "loss": "1.269", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "102.28", "wer_total": "127.09", "n_error": "24.795", "ppl": "2.41", "accuracy": "80.478", "wer": "19.51", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "46200", "lr": "0.00023369", "gnorm": "4.579", "loss_scale": "4096", "train_wall": "367", "gb_free": "6.5", "wall": "128462"} [2024-07-07 12:31:24,809][train_inner][INFO] - {"epoch": 1, "update": 0.308, "loss": "1.271", "ntokens": "126.67", "acc_total": "126.67", "n_correct": "101.86", "wer_total": "126.67", "n_error": "24.79", "ppl": "2.41", "accuracy": "80.414", "wer": "19.571", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "46400", "lr": "0.00023468", "gnorm": "4.601", "loss_scale": "4096", "train_wall": "366", "gb_free": "6.5", "wall": "128829"} [2024-07-07 12:37:06,497][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 12:37:33,986][train_inner][INFO] - {"epoch": 1, "update": 0.309, "loss": "1.306", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "101.29", "wer_total": "127.375", "n_error": "26.08", "ppl": "2.47", "accuracy": "79.521", "wer": "20.475", "wps": "69", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "46600", "lr": "0.00023567", "gnorm": "4.666", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "129198"} [2024-07-07 12:43:41,188][train_inner][INFO] - {"epoch": 1, "update": 0.31, "loss": "1.313", "ntokens": "126.485", "acc_total": "126.485", "n_correct": "99.845", "wer_total": "126.485", "n_error": "26.62", "ppl": "2.48", "accuracy": "78.938", "wer": "21.046", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "46800", "lr": "0.00023666", "gnorm": "4.657", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "129565"} [2024-07-07 12:49:48,324][train_inner][INFO] - {"epoch": 1, "update": 0.312, "loss": "1.267", "ntokens": "127.39", "acc_total": "127.39", "n_correct": "101.825", "wer_total": "127.39", "n_error": "25.545", "ppl": "2.41", "accuracy": "79.932", "wer": "20.053", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "47000", "lr": "0.00023765", "gnorm": "4.519", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "129932"} [2024-07-07 12:55:55,521][train_inner][INFO] - {"epoch": 1, "update": 0.313, "loss": "1.298", "ntokens": "126.985", "acc_total": "126.985", "n_correct": "100.28", "wer_total": "126.985", "n_error": "26.7", "ppl": "2.46", "accuracy": "78.97", "wer": "21.026", "wps": "69.2", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "47200", "lr": "0.00023864", "gnorm": "4.619", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "130300"} [2024-07-07 13:02:02,484][train_inner][INFO] - {"epoch": 1, "update": 0.314, "loss": "1.269", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "100.825", "wer_total": "126.675", "n_error": "25.84", "ppl": "2.41", "accuracy": "79.593", "wer": "20.399", "wps": "69", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "47400", "lr": "0.00023963", "gnorm": "4.562", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "130667"} [2024-07-07 13:05:05,968][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 13:47:05,375][valid][INFO] - {"epoch": 1, "valid_loss": "1.074", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.9672", "valid_wer_total": "18.1585", "valid_n_error": "3.18895", "valid_ppl": "2.11", "valid_accuracy": "82.426", "valid_wer": "17.562", "valid_wps": "172.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "47500", "valid_best_accuracy": "82.426"} [2024-07-07 13:47:05,375][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 47500 updates [2024-07-07 13:47:05,376][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_47500.pt [2024-07-07 13:47:08,562][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_47500.pt [2024-07-07 13:47:12,761][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_47500.pt (epoch 1 @ 47500 updates, score 82.426) (writing took 7.385530806030147 seconds) [2024-07-07 13:50:16,149][train_inner][INFO] - {"epoch": 1, "update": 0.316, "loss": "1.261", "ntokens": "127.315", "acc_total": "127.315", "n_correct": "100.76", "wer_total": "127.315", "n_error": "26.53", "ppl": "2.4", "accuracy": "79.142", "wer": "20.838", "wps": "8.8", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "47600", "lr": "0.00024062", "gnorm": "4.602", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "133560"} [2024-07-07 13:56:23,798][train_inner][INFO] - {"epoch": 1, "update": 0.317, "loss": "1.216", "ntokens": "126.745", "acc_total": "126.745", "n_correct": "100.43", "wer_total": "126.745", "n_error": "26.285", "ppl": "2.32", "accuracy": "79.238", "wer": "20.738", "wps": "68.9", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "47800", "lr": "0.00024161", "gnorm": "4.444", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "133928"} [2024-07-07 14:02:31,330][train_inner][INFO] - {"epoch": 1, "update": 0.318, "loss": "1.316", "ntokens": "126.005", "acc_total": "126.005", "n_correct": "97.04", "wer_total": "126.005", "n_error": "28.945", "ppl": "2.49", "accuracy": "77.013", "wer": "22.971", "wps": "68.6", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "48000", "lr": "0.0002426", "gnorm": "4.646", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "134295"} [2024-07-07 14:07:18,110][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-07 14:08:40,789][train_inner][INFO] - {"epoch": 1, "update": 0.32, "loss": "1.263", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "102.405", "wer_total": "127.385", "n_error": "24.97", "ppl": "2.4", "accuracy": "80.39", "wer": "19.602", "wps": "69", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "48200", "lr": "0.00024359", "gnorm": "4.424", "loss_scale": "1024", "train_wall": "369", "gb_free": "6.5", "wall": "134665"} [2024-07-07 14:14:48,585][train_inner][INFO] - {"epoch": 1, "update": 0.321, "loss": "1.238", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "102.125", "wer_total": "126.665", "n_error": "24.515", "ppl": "2.36", "accuracy": "80.626", "wer": "19.354", "wps": "68.9", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "48400", "lr": "0.00024458", "gnorm": "4.461", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "135033"} [2024-07-07 14:20:56,269][train_inner][INFO] - {"epoch": 1, "update": 0.322, "loss": "1.279", "ntokens": "127.47", "acc_total": "127.47", "n_correct": "102.455", "wer_total": "127.47", "n_error": "25", "ppl": "2.43", "accuracy": "80.376", "wer": "19.612", "wps": "69.3", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "48600", "lr": "0.00024557", "gnorm": "4.334", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "135400"} [2024-07-07 14:27:04,357][train_inner][INFO] - {"epoch": 1, "update": 0.324, "loss": "1.237", "ntokens": "125.945", "acc_total": "125.945", "n_correct": "101.56", "wer_total": "125.945", "n_error": "24.37", "ppl": "2.36", "accuracy": "80.638", "wer": "19.35", "wps": "68.4", "ups": "0.54", "wpb": "125.9", "bsz": "8", "num_updates": "48800", "lr": "0.00024656", "gnorm": "4.406", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "135768"} [2024-07-07 14:33:11,971][train_inner][INFO] - {"epoch": 1, "update": 0.325, "loss": "1.287", "ntokens": "126.33", "acc_total": "126.33", "n_correct": "99.375", "wer_total": "126.33", "n_error": "26.95", "ppl": "2.44", "accuracy": "78.663", "wer": "21.333", "wps": "68.7", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "49000", "lr": "0.00024755", "gnorm": "4.407", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "136136"} [2024-07-07 14:39:19,271][train_inner][INFO] - {"epoch": 1, "update": 0.326, "loss": "1.286", "ntokens": "127.755", "acc_total": "127.755", "n_correct": "101.09", "wer_total": "127.755", "n_error": "26.65", "ppl": "2.44", "accuracy": "79.128", "wer": "20.86", "wps": "69.6", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "49200", "lr": "0.00024854", "gnorm": "4.383", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "136503"} [2024-07-07 14:45:27,100][train_inner][INFO] - {"epoch": 1, "update": 0.328, "loss": "1.259", "ntokens": "126.96", "acc_total": "126.96", "n_correct": "104.085", "wer_total": "126.96", "n_error": "22.85", "ppl": "2.39", "accuracy": "81.983", "wer": "17.998", "wps": "69", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "49400", "lr": "0.00024953", "gnorm": "4.356", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "136871"} [2024-07-07 14:51:34,473][train_inner][INFO] - {"epoch": 1, "update": 0.329, "loss": "1.302", "ntokens": "125.79", "acc_total": "125.79", "n_correct": "100.075", "wer_total": "125.79", "n_error": "25.715", "ppl": "2.47", "accuracy": "79.557", "wer": "20.443", "wps": "68.5", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "49600", "lr": "0.00025052", "gnorm": "4.351", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "137239"} [2024-07-07 14:57:41,884][train_inner][INFO] - {"epoch": 1, "update": 0.33, "loss": "1.284", "ntokens": "126.48", "acc_total": "126.48", "n_correct": "100.135", "wer_total": "126.48", "n_error": "26.315", "ppl": "2.44", "accuracy": "79.171", "wer": "20.806", "wps": "68.8", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "49800", "lr": "0.00025151", "gnorm": "4.545", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "137606"} [2024-07-07 15:03:49,329][train_inner][INFO] - {"epoch": 1, "update": 0.332, "loss": "1.323", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "102.195", "wer_total": "126.68", "n_error": "24.475", "ppl": "2.5", "accuracy": "80.672", "wer": "19.32", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "50000", "lr": "0.0002525", "gnorm": "4.464", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "137973"} [2024-07-07 15:03:49,329][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 15:45:58,436][valid][INFO] - {"epoch": 1, "valid_loss": "1.095", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.0245", "valid_wer_total": "18.1585", "valid_n_error": "3.13256", "valid_ppl": "2.14", "valid_accuracy": "82.741", "valid_wer": "17.251", "valid_wps": "172.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "50000", "valid_best_accuracy": "82.741"} [2024-07-07 15:45:58,437][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 50000 updates [2024-07-07 15:45:58,437][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_50000.pt [2024-07-07 15:46:01,614][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_50000.pt [2024-07-07 15:46:05,814][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_50000.pt (epoch 1 @ 50000 updates, score 82.741) (writing took 7.377040912979282 seconds) [2024-07-07 15:52:12,988][train_inner][INFO] - {"epoch": 1, "update": 0.333, "loss": "1.264", "ntokens": "125.975", "acc_total": "125.975", "n_correct": "103.08", "wer_total": "125.975", "n_error": "22.885", "ppl": "2.4", "accuracy": "81.826", "wer": "18.166", "wps": "8.7", "ups": "0.07", "wpb": "126", "bsz": "8", "num_updates": "50200", "lr": "0.00025349", "gnorm": "4.348", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "140877"} [2024-07-07 15:58:20,528][train_inner][INFO] - {"epoch": 1, "update": 0.334, "loss": "1.242", "ntokens": "127.325", "acc_total": "127.325", "n_correct": "102.49", "wer_total": "127.325", "n_error": "24.83", "ppl": "2.36", "accuracy": "80.495", "wer": "19.501", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "50400", "lr": "0.00025448", "gnorm": "4.259", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "141245"} [2024-07-07 16:04:28,141][train_inner][INFO] - {"epoch": 1, "update": 0.336, "loss": "1.289", "ntokens": "127.195", "acc_total": "127.195", "n_correct": "102.97", "wer_total": "127.195", "n_error": "24.21", "ppl": "2.44", "accuracy": "80.954", "wer": "19.034", "wps": "69.2", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "50600", "lr": "0.00025547", "gnorm": "4.351", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "141612"} [2024-07-07 16:10:35,672][train_inner][INFO] - {"epoch": 1, "update": 0.337, "loss": "1.255", "ntokens": "128.035", "acc_total": "128.035", "n_correct": "102.57", "wer_total": "128.035", "n_error": "25.435", "ppl": "2.39", "accuracy": "80.111", "wer": "19.866", "wps": "69.7", "ups": "0.54", "wpb": "128", "bsz": "8", "num_updates": "50800", "lr": "0.00025646", "gnorm": "4.317", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "141980"} [2024-07-07 16:16:43,123][train_inner][INFO] - {"epoch": 1, "update": 0.338, "loss": "1.255", "ntokens": "126.84", "acc_total": "126.84", "n_correct": "103.345", "wer_total": "126.84", "n_error": "23.49", "ppl": "2.39", "accuracy": "81.477", "wer": "18.519", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "51000", "lr": "0.00025745", "gnorm": "4.33", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "142347"} [2024-07-07 16:22:50,609][train_inner][INFO] - {"epoch": 1, "update": 0.34, "loss": "1.295", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "103.58", "wer_total": "126.76", "n_error": "23.18", "ppl": "2.45", "accuracy": "81.713", "wer": "18.287", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "51200", "lr": "0.00025844", "gnorm": "4.339", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "142715"} [2024-07-07 16:28:57,983][train_inner][INFO] - {"epoch": 1, "update": 0.341, "loss": "1.265", "ntokens": "128.065", "acc_total": "128.065", "n_correct": "103.93", "wer_total": "128.065", "n_error": "24.105", "ppl": "2.4", "accuracy": "81.154", "wer": "18.822", "wps": "69.7", "ups": "0.54", "wpb": "128.1", "bsz": "8", "num_updates": "51400", "lr": "0.00025943", "gnorm": "4.35", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "143082"} [2024-07-07 16:35:05,406][train_inner][INFO] - {"epoch": 1, "update": 0.342, "loss": "1.284", "ntokens": "127.15", "acc_total": "127.15", "n_correct": "103.275", "wer_total": "127.15", "n_error": "23.87", "ppl": "2.44", "accuracy": "81.223", "wer": "18.773", "wps": "69.2", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "51600", "lr": "0.00026042", "gnorm": "4.453", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "143449"} [2024-07-07 16:41:12,793][train_inner][INFO] - {"epoch": 1, "update": 0.343, "loss": "1.296", "ntokens": "127.84", "acc_total": "127.84", "n_correct": "104.995", "wer_total": "127.84", "n_error": "22.835", "ppl": "2.46", "accuracy": "82.13", "wer": "17.862", "wps": "69.6", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "51800", "lr": "0.00026141", "gnorm": "4.174", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "143817"} [2024-07-07 16:47:19,996][train_inner][INFO] - {"epoch": 1, "update": 0.345, "loss": "1.257", "ntokens": "127.4", "acc_total": "127.4", "n_correct": "104.875", "wer_total": "127.4", "n_error": "22.505", "ppl": "2.39", "accuracy": "82.319", "wer": "17.665", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "52000", "lr": "0.0002624", "gnorm": "4.289", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "144184"} [2024-07-07 16:53:27,604][train_inner][INFO] - {"epoch": 1, "update": 0.346, "loss": "1.291", "ntokens": "127.54", "acc_total": "127.54", "n_correct": "104.03", "wer_total": "127.54", "n_error": "23.5", "ppl": "2.45", "accuracy": "81.567", "wer": "18.426", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "52200", "lr": "0.00026339", "gnorm": "4.13", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "144552"} [2024-07-07 16:59:22,097][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 16:59:36,791][train_inner][INFO] - {"epoch": 1, "update": 0.347, "loss": "1.28", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "104.68", "wer_total": "127.695", "n_error": "23.005", "ppl": "2.43", "accuracy": "81.977", "wer": "18.016", "wps": "69.2", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "52400", "lr": "0.00026438", "gnorm": "4.282", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "144921"} [2024-07-07 17:02:40,311][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 17:44:34,775][valid][INFO] - {"epoch": 1, "valid_loss": "1.095", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2629", "valid_wer_total": "18.1585", "valid_n_error": "2.89458", "valid_ppl": "2.14", "valid_accuracy": "84.054", "valid_wer": "15.941", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "52500", "valid_best_accuracy": "84.054"} [2024-07-07 17:44:34,776][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 52500 updates [2024-07-07 17:44:34,776][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_52500.pt [2024-07-07 17:44:37,953][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_52500.pt [2024-07-07 17:44:42,148][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_52500.pt (epoch 1 @ 52500 updates, score 84.054) (writing took 7.372627937991638 seconds) [2024-07-07 17:47:45,920][train_inner][INFO] - {"epoch": 1, "update": 0.349, "loss": "1.247", "ntokens": "126.91", "acc_total": "126.91", "n_correct": "104.855", "wer_total": "126.91", "n_error": "22.055", "ppl": "2.37", "accuracy": "82.622", "wer": "17.378", "wps": "8.8", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "52600", "lr": "0.00026537", "gnorm": "4.405", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "147810"} [2024-07-07 17:53:52,996][train_inner][INFO] - {"epoch": 1, "update": 0.35, "loss": "1.247", "ntokens": "125.82", "acc_total": "125.82", "n_correct": "103.94", "wer_total": "125.82", "n_error": "21.865", "ppl": "2.37", "accuracy": "82.61", "wer": "17.378", "wps": "68.6", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "52800", "lr": "0.00026636", "gnorm": "4.219", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "148177"} [2024-07-07 18:00:00,664][train_inner][INFO] - {"epoch": 1, "update": 0.351, "loss": "1.24", "ntokens": "126.845", "acc_total": "126.845", "n_correct": "104.37", "wer_total": "126.845", "n_error": "22.475", "ppl": "2.36", "accuracy": "82.282", "wer": "17.718", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "53000", "lr": "0.00026735", "gnorm": "4.177", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "148545"} [2024-07-07 18:06:07,830][train_inner][INFO] - {"epoch": 1, "update": 0.353, "loss": "1.234", "ntokens": "127.46", "acc_total": "127.46", "n_correct": "105.505", "wer_total": "127.46", "n_error": "21.935", "ppl": "2.35", "accuracy": "82.775", "wer": "17.209", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "53200", "lr": "0.00026834", "gnorm": "4.158", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "148912"} [2024-07-07 18:12:15,015][train_inner][INFO] - {"epoch": 1, "update": 0.354, "loss": "1.294", "ntokens": "127.02", "acc_total": "127.02", "n_correct": "104.1", "wer_total": "127.02", "n_error": "22.91", "ppl": "2.45", "accuracy": "81.956", "wer": "18.037", "wps": "69.2", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "53400", "lr": "0.00026933", "gnorm": "4.071", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "149279"} [2024-07-07 18:18:22,183][train_inner][INFO] - {"epoch": 1, "update": 0.355, "loss": "1.256", "ntokens": "126.475", "acc_total": "126.475", "n_correct": "104.06", "wer_total": "126.475", "n_error": "22.395", "ppl": "2.39", "accuracy": "82.277", "wer": "17.707", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "53600", "lr": "0.00027032", "gnorm": "4.137", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "149646"} [2024-07-07 18:24:29,301][train_inner][INFO] - {"epoch": 1, "update": 0.357, "loss": "1.297", "ntokens": "127.01", "acc_total": "127.01", "n_correct": "103.29", "wer_total": "127.01", "n_error": "23.72", "ppl": "2.46", "accuracy": "81.324", "wer": "18.676", "wps": "69.2", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "53800", "lr": "0.00027131", "gnorm": "4.108", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "150013"} [2024-07-07 18:30:36,553][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "1.275", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "105.03", "wer_total": "127.515", "n_error": "22.475", "ppl": "2.42", "accuracy": "82.367", "wer": "17.625", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "54000", "lr": "0.0002723", "gnorm": "4.038", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "150381"} [2024-07-07 18:36:43,872][train_inner][INFO] - {"epoch": 1, "update": 0.359, "loss": "1.245", "ntokens": "127.22", "acc_total": "127.22", "n_correct": "105.025", "wer_total": "127.22", "n_error": "22.18", "ppl": "2.37", "accuracy": "82.554", "wer": "17.434", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "54200", "lr": "0.00027329", "gnorm": "4.24", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "150748"} [2024-07-07 18:42:51,266][train_inner][INFO] - {"epoch": 1, "update": 0.361, "loss": "1.243", "ntokens": "126.315", "acc_total": "126.315", "n_correct": "102.685", "wer_total": "126.315", "n_error": "23.615", "ppl": "2.37", "accuracy": "81.293", "wer": "18.695", "wps": "68.8", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "54400", "lr": "0.00027428", "gnorm": "4.082", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "151115"} [2024-07-07 18:44:24,942][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 18:49:00,380][train_inner][INFO] - {"epoch": 1, "update": 0.362, "loss": "1.27", "ntokens": "127.16", "acc_total": "127.16", "n_correct": "102.855", "wer_total": "127.16", "n_error": "24.29", "ppl": "2.41", "accuracy": "80.886", "wer": "19.102", "wps": "68.9", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "54600", "lr": "0.00027527", "gnorm": "4.13", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "151484"} [2024-07-07 18:55:07,761][train_inner][INFO] - {"epoch": 1, "update": 0.363, "loss": "1.186", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "105.48", "wer_total": "126.785", "n_error": "21.29", "ppl": "2.27", "accuracy": "83.196", "wer": "16.792", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "54800", "lr": "0.00027626", "gnorm": "4.045", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "151852"} [2024-07-07 19:01:15,061][train_inner][INFO] - {"epoch": 1, "update": 0.365, "loss": "1.219", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "104.29", "wer_total": "126.82", "n_error": "22.53", "ppl": "2.33", "accuracy": "82.235", "wer": "17.765", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "55000", "lr": "0.00027725", "gnorm": "4.091", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "152219"} [2024-07-07 19:01:15,062][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 19:43:07,772][valid][INFO] - {"epoch": 1, "valid_loss": "1.071", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3372", "valid_wer_total": "18.1585", "valid_n_error": "2.81988", "valid_ppl": "2.1", "valid_accuracy": "84.463", "valid_wer": "15.529", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "55000", "valid_best_accuracy": "84.463"} [2024-07-07 19:43:07,772][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 55000 updates [2024-07-07 19:43:07,773][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_55000.pt [2024-07-07 19:43:10,940][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_55000.pt [2024-07-07 19:43:15,168][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_55000.pt (epoch 1 @ 55000 updates, score 84.463) (writing took 7.395976039988454 seconds) [2024-07-07 19:49:22,217][train_inner][INFO] - {"epoch": 1, "update": 0.366, "loss": "1.275", "ntokens": "126.34", "acc_total": "126.34", "n_correct": "103.4", "wer_total": "126.34", "n_error": "22.925", "ppl": "2.42", "accuracy": "81.843", "wer": "18.145", "wps": "8.8", "ups": "0.07", "wpb": "126.3", "bsz": "8", "num_updates": "55200", "lr": "0.00027824", "gnorm": "4.205", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "155106"} [2024-07-07 19:55:29,279][train_inner][INFO] - {"epoch": 1, "update": 0.367, "loss": "1.291", "ntokens": "126.64", "acc_total": "126.64", "n_correct": "102.36", "wer_total": "126.64", "n_error": "24.265", "ppl": "2.45", "accuracy": "80.828", "wer": "19.161", "wps": "69", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "55400", "lr": "0.00027923", "gnorm": "3.959", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "155473"} [2024-07-07 20:01:36,439][train_inner][INFO] - {"epoch": 1, "update": 0.369, "loss": "1.26", "ntokens": "125.955", "acc_total": "125.955", "n_correct": "100.125", "wer_total": "125.955", "n_error": "25.81", "ppl": "2.4", "accuracy": "79.493", "wer": "20.491", "wps": "68.6", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "55600", "lr": "0.00028022", "gnorm": "4.009", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "155840"} [2024-07-07 20:07:43,393][train_inner][INFO] - {"epoch": 1, "update": 0.37, "loss": "1.247", "ntokens": "127.675", "acc_total": "127.675", "n_correct": "104.13", "wer_total": "127.675", "n_error": "23.545", "ppl": "2.37", "accuracy": "81.559", "wer": "18.441", "wps": "69.6", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "55800", "lr": "0.00028121", "gnorm": "4.187", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "156207"} [2024-07-07 20:13:50,826][train_inner][INFO] - {"epoch": 1, "update": 0.371, "loss": "1.303", "ntokens": "126.36", "acc_total": "126.36", "n_correct": "101.595", "wer_total": "126.36", "n_error": "24.765", "ppl": "2.47", "accuracy": "80.401", "wer": "19.599", "wps": "68.8", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "56000", "lr": "0.0002822", "gnorm": "4.201", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "156575"} [2024-07-07 20:19:58,052][train_inner][INFO] - {"epoch": 1, "update": 0.373, "loss": "1.249", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "103.54", "wer_total": "127.11", "n_error": "23.57", "ppl": "2.38", "accuracy": "81.457", "wer": "18.543", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "56200", "lr": "0.00028319", "gnorm": "4.099", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "156942"} [2024-07-07 20:26:04,984][train_inner][INFO] - {"epoch": 1, "update": 0.374, "loss": "1.287", "ntokens": "125.84", "acc_total": "125.84", "n_correct": "102.95", "wer_total": "125.84", "n_error": "22.885", "ppl": "2.44", "accuracy": "81.81", "wer": "18.186", "wps": "68.6", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "56400", "lr": "0.00028418", "gnorm": "4.12", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "157309"} [2024-07-07 20:31:46,045][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 20:32:13,682][train_inner][INFO] - {"epoch": 1, "update": 0.375, "loss": "1.294", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "104.015", "wer_total": "127.375", "n_error": "23.345", "ppl": "2.45", "accuracy": "81.66", "wer": "18.328", "wps": "69.1", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "56600", "lr": "0.00028517", "gnorm": "4.128", "loss_scale": "2048", "train_wall": "368", "gb_free": "6.5", "wall": "157678"} [2024-07-07 20:38:20,347][train_inner][INFO] - {"epoch": 1, "update": 0.377, "loss": "1.291", "ntokens": "125.93", "acc_total": "125.93", "n_correct": "103.37", "wer_total": "125.93", "n_error": "22.545", "ppl": "2.45", "accuracy": "82.085", "wer": "17.903", "wps": "68.7", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "56800", "lr": "0.00028616", "gnorm": "4.073", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "158044"} [2024-07-07 20:44:27,275][train_inner][INFO] - {"epoch": 1, "update": 0.378, "loss": "1.244", "ntokens": "126.47", "acc_total": "126.47", "n_correct": "103.89", "wer_total": "126.47", "n_error": "22.565", "ppl": "2.37", "accuracy": "82.146", "wer": "17.842", "wps": "68.9", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "57000", "lr": "0.00028715", "gnorm": "4.035", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "158411"} [2024-07-07 20:50:34,676][train_inner][INFO] - {"epoch": 1, "update": 0.379, "loss": "1.249", "ntokens": "127.395", "acc_total": "127.395", "n_correct": "104.44", "wer_total": "127.395", "n_error": "22.95", "ppl": "2.38", "accuracy": "81.981", "wer": "18.015", "wps": "69.3", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "57200", "lr": "0.00028814", "gnorm": "3.88", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "158779"} [2024-07-07 20:56:41,678][train_inner][INFO] - {"epoch": 1, "update": 0.381, "loss": "1.258", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "105.125", "wer_total": "127.695", "n_error": "22.57", "ppl": "2.39", "accuracy": "82.325", "wer": "17.675", "wps": "69.6", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "57400", "lr": "0.00028913", "gnorm": "3.844", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "159146"} [2024-07-07 20:59:45,330][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 21:42:03,668][valid][INFO] - {"epoch": 1, "valid_loss": "1.039", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4036", "valid_wer_total": "18.1585", "valid_n_error": "2.75356", "valid_ppl": "2.05", "valid_accuracy": "84.829", "valid_wer": "15.164", "valid_wps": "171.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "57500", "valid_best_accuracy": "84.829"} [2024-07-07 21:42:03,668][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 57500 updates [2024-07-07 21:42:03,669][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_57500.pt [2024-07-07 21:42:06,862][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_57500.pt [2024-07-07 21:42:11,200][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_57500.pt (epoch 1 @ 57500 updates, score 84.829) (writing took 7.53154621302383 seconds) [2024-07-07 21:45:14,356][train_inner][INFO] - {"epoch": 1, "update": 0.382, "loss": "1.268", "ntokens": "126.35", "acc_total": "126.35", "n_correct": "102.315", "wer_total": "126.35", "n_error": "24.02", "ppl": "2.41", "accuracy": "80.977", "wer": "19.011", "wps": "8.7", "ups": "0.07", "wpb": "126.3", "bsz": "8", "num_updates": "57600", "lr": "0.00029012", "gnorm": "3.929", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "162058"} [2024-07-07 21:51:21,201][train_inner][INFO] - {"epoch": 1, "update": 0.383, "loss": "1.228", "ntokens": "127.365", "acc_total": "127.365", "n_correct": "102.1", "wer_total": "127.365", "n_error": "25.23", "ppl": "2.34", "accuracy": "80.163", "wer": "19.809", "wps": "69.4", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "57800", "lr": "0.00029111", "gnorm": "3.913", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "162425"} [2024-07-07 21:57:27,700][train_inner][INFO] - {"epoch": 1, "update": 0.385, "loss": "1.3", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "102.95", "wer_total": "126.555", "n_error": "23.575", "ppl": "2.46", "accuracy": "81.348", "wer": "18.628", "wps": "69.1", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "58000", "lr": "0.0002921", "gnorm": "4.124", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "162792"} [2024-07-07 22:03:34,815][train_inner][INFO] - {"epoch": 1, "update": 0.386, "loss": "1.243", "ntokens": "127.645", "acc_total": "127.645", "n_correct": "105.605", "wer_total": "127.645", "n_error": "22.03", "ppl": "2.37", "accuracy": "82.733", "wer": "17.259", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "58200", "lr": "0.00029309", "gnorm": "3.856", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "163159"} [2024-07-07 22:09:41,231][train_inner][INFO] - {"epoch": 1, "update": 0.387, "loss": "1.288", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "103.085", "wer_total": "126.295", "n_error": "23.19", "ppl": "2.44", "accuracy": "81.622", "wer": "18.362", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "58400", "lr": "0.00029408", "gnorm": "4.068", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "163525"} [2024-07-07 22:15:48,013][train_inner][INFO] - {"epoch": 1, "update": 0.389, "loss": "1.262", "ntokens": "127.04", "acc_total": "127.04", "n_correct": "104.56", "wer_total": "127.04", "n_error": "22.47", "ppl": "2.4", "accuracy": "82.305", "wer": "17.687", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "58600", "lr": "0.00029507", "gnorm": "3.749", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "163892"} [2024-07-07 22:17:34,274][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-07 22:21:56,192][train_inner][INFO] - {"epoch": 1, "update": 0.39, "loss": "1.276", "ntokens": "126.695", "acc_total": "126.695", "n_correct": "104.315", "wer_total": "126.695", "n_error": "22.36", "ppl": "2.42", "accuracy": "82.336", "wer": "17.649", "wps": "68.8", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "58800", "lr": "0.00029606", "gnorm": "3.973", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "164260"} [2024-07-07 22:28:02,942][train_inner][INFO] - {"epoch": 1, "update": 0.391, "loss": "1.213", "ntokens": "126.825", "acc_total": "126.825", "n_correct": "104.985", "wer_total": "126.825", "n_error": "21.83", "ppl": "2.32", "accuracy": "82.779", "wer": "17.213", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "59000", "lr": "0.00029705", "gnorm": "3.81", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "164627"} [2024-07-07 22:31:41,103][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-07 22:34:11,342][train_inner][INFO] - {"epoch": 1, "update": 0.393, "loss": "1.302", "ntokens": "126.375", "acc_total": "126.375", "n_correct": "103.56", "wer_total": "126.375", "n_error": "22.81", "ppl": "2.47", "accuracy": "81.947", "wer": "18.049", "wps": "68.6", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "59200", "lr": "0.00029804", "gnorm": "4.065", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "164995"} [2024-07-07 22:40:17,565][train_inner][INFO] - {"epoch": 1, "update": 0.394, "loss": "1.261", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "104.8", "wer_total": "127.32", "n_error": "22.52", "ppl": "2.4", "accuracy": "82.312", "wer": "17.688", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "59400", "lr": "0.00029903", "gnorm": "4.073", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "165362"} [2024-07-07 22:46:24,090][train_inner][INFO] - {"epoch": 1, "update": 0.395, "loss": "1.246", "ntokens": "126.215", "acc_total": "126.215", "n_correct": "104.095", "wer_total": "126.215", "n_error": "22.12", "ppl": "2.37", "accuracy": "82.474", "wer": "17.526", "wps": "68.9", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "59600", "lr": "0.00030002", "gnorm": "3.874", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "165728"} [2024-07-07 22:52:30,680][train_inner][INFO] - {"epoch": 1, "update": 0.397, "loss": "1.244", "ntokens": "127.085", "acc_total": "127.085", "n_correct": "105.33", "wer_total": "127.085", "n_error": "21.745", "ppl": "2.37", "accuracy": "82.882", "wer": "17.111", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "59800", "lr": "0.00030101", "gnorm": "3.731", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "166095"} [2024-07-07 22:58:37,439][train_inner][INFO] - {"epoch": 1, "update": 0.398, "loss": "1.239", "ntokens": "126.88", "acc_total": "126.88", "n_correct": "105.025", "wer_total": "126.88", "n_error": "21.825", "ppl": "2.36", "accuracy": "82.775", "wer": "17.201", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "60000", "lr": "0.000302", "gnorm": "3.902", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "166461"} [2024-07-07 22:58:37,440][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-07 23:40:29,560][valid][INFO] - {"epoch": 1, "valid_loss": "1.037", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4273", "valid_wer_total": "18.1585", "valid_n_error": "2.73022", "valid_ppl": "2.05", "valid_accuracy": "84.959", "valid_wer": "15.036", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "60000", "valid_best_accuracy": "84.959"} [2024-07-07 23:40:29,560][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 60000 updates [2024-07-07 23:40:29,561][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_60000.pt [2024-07-07 23:40:32,715][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_60000.pt [2024-07-07 23:40:36,967][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_60000.pt (epoch 1 @ 60000 updates, score 84.959) (writing took 7.406840390001889 seconds) [2024-07-07 23:46:44,065][train_inner][INFO] - {"epoch": 1, "update": 0.399, "loss": "1.311", "ntokens": "126.43", "acc_total": "126.43", "n_correct": "103.585", "wer_total": "126.43", "n_error": "22.84", "ppl": "2.48", "accuracy": "81.931", "wer": "18.065", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "60200", "lr": "0.00030299", "gnorm": "4.016", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "169348"} [2024-07-07 23:52:50,920][train_inner][INFO] - {"epoch": 1, "update": 0.401, "loss": "1.298", "ntokens": "126.715", "acc_total": "126.715", "n_correct": "103.825", "wer_total": "126.715", "n_error": "22.885", "ppl": "2.46", "accuracy": "81.936", "wer": "18.06", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "60400", "lr": "0.00030398", "gnorm": "3.82", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "169715"} [2024-07-07 23:58:57,643][train_inner][INFO] - {"epoch": 1, "update": 0.402, "loss": "1.289", "ntokens": "127.66", "acc_total": "127.66", "n_correct": "104.97", "wer_total": "127.66", "n_error": "22.68", "ppl": "2.44", "accuracy": "82.226", "wer": "17.766", "wps": "69.6", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "60600", "lr": "0.00030497", "gnorm": "3.82", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "170082"} [2024-07-08 00:05:04,425][train_inner][INFO] - {"epoch": 1, "update": 0.403, "loss": "1.229", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "104.145", "wer_total": "126.78", "n_error": "22.62", "ppl": "2.34", "accuracy": "82.146", "wer": "17.842", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "60800", "lr": "0.00030596", "gnorm": "3.98", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "170448"} [2024-07-08 00:11:11,069][train_inner][INFO] - {"epoch": 1, "update": 0.405, "loss": "1.254", "ntokens": "126.44", "acc_total": "126.44", "n_correct": "104.295", "wer_total": "126.44", "n_error": "22.14", "ppl": "2.38", "accuracy": "82.486", "wer": "17.51", "wps": "69", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "61000", "lr": "0.00030695", "gnorm": "3.967", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "170815"} [2024-07-08 00:12:24,357][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-08 00:17:19,659][train_inner][INFO] - {"epoch": 1, "update": 0.406, "loss": "1.292", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "103.825", "wer_total": "126.905", "n_error": "23.07", "ppl": "2.45", "accuracy": "81.813", "wer": "18.179", "wps": "68.9", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "61200", "lr": "0.00030794", "gnorm": "3.843", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "171184"} [2024-07-08 00:23:26,552][train_inner][INFO] - {"epoch": 1, "update": 0.407, "loss": "1.264", "ntokens": "126.275", "acc_total": "126.275", "n_correct": "104.15", "wer_total": "126.275", "n_error": "22.105", "ppl": "2.4", "accuracy": "82.479", "wer": "17.505", "wps": "68.8", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "61400", "lr": "0.00030893", "gnorm": "3.768", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "171551"} [2024-07-08 00:29:33,485][train_inner][INFO] - {"epoch": 1, "update": 0.409, "loss": "1.245", "ntokens": "125.835", "acc_total": "125.835", "n_correct": "103.995", "wer_total": "125.835", "n_error": "21.84", "ppl": "2.37", "accuracy": "82.644", "wer": "17.356", "wps": "68.6", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "61600", "lr": "0.00030992", "gnorm": "3.671", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "171918"} [2024-07-08 00:35:40,486][train_inner][INFO] - {"epoch": 1, "update": 0.41, "loss": "1.267", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "104.11", "wer_total": "127.055", "n_error": "22.945", "ppl": "2.41", "accuracy": "81.941", "wer": "18.059", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "61800", "lr": "0.00031091", "gnorm": "3.846", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "172285"} [2024-07-08 00:41:47,359][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "1.257", "ntokens": "126.99", "acc_total": "126.99", "n_correct": "104.82", "wer_total": "126.99", "n_error": "22.135", "ppl": "2.39", "accuracy": "82.542", "wer": "17.431", "wps": "69.2", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "62000", "lr": "0.0003119", "gnorm": "3.678", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "172651"} [2024-07-08 00:47:54,437][train_inner][INFO] - {"epoch": 1, "update": 0.412, "loss": "1.307", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "104.14", "wer_total": "127.06", "n_error": "22.915", "ppl": "2.47", "accuracy": "81.961", "wer": "18.035", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "62200", "lr": "0.00031289", "gnorm": "3.751", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "173018"} [2024-07-08 00:54:01,109][train_inner][INFO] - {"epoch": 1, "update": 0.414, "loss": "1.257", "ntokens": "126.435", "acc_total": "126.435", "n_correct": "104.12", "wer_total": "126.435", "n_error": "22.315", "ppl": "2.39", "accuracy": "82.351", "wer": "17.649", "wps": "69", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "62400", "lr": "0.00031388", "gnorm": "3.884", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "173385"} [2024-07-08 00:57:04,462][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 01:38:55,934][valid][INFO] - {"epoch": 1, "valid_loss": "1.039", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4103", "valid_wer_total": "18.1585", "valid_n_error": "2.74681", "valid_ppl": "2.05", "valid_accuracy": "84.866", "valid_wer": "15.127", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "62500", "valid_best_accuracy": "84.959"} [2024-07-08 01:38:55,935][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 62500 updates [2024-07-08 01:38:55,935][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_62500.pt [2024-07-08 01:38:59,147][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_62500.pt [2024-07-08 01:39:01,291][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_62500.pt (epoch 1 @ 62500 updates, score 84.866) (writing took 5.356785287964158 seconds) [2024-07-08 01:42:04,505][train_inner][INFO] - {"epoch": 1, "update": 0.415, "loss": "1.265", "ntokens": "128.095", "acc_total": "128.095", "n_correct": "105.19", "wer_total": "128.095", "n_error": "22.88", "ppl": "2.4", "accuracy": "82.119", "wer": "17.862", "wps": "8.9", "ups": "0.07", "wpb": "128.1", "bsz": "8", "num_updates": "62600", "lr": "0.00031487", "gnorm": "3.677", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "176269"} [2024-07-08 01:48:11,380][train_inner][INFO] - {"epoch": 1, "update": 0.416, "loss": "1.231", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "104.935", "wer_total": "126.775", "n_error": "21.84", "ppl": "2.35", "accuracy": "82.773", "wer": "17.227", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "62800", "lr": "0.00031586", "gnorm": "3.849", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "176635"} [2024-07-08 01:54:18,292][train_inner][INFO] - {"epoch": 1, "update": 0.418, "loss": "1.245", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "104.46", "wer_total": "126.815", "n_error": "22.345", "ppl": "2.37", "accuracy": "82.372", "wer": "17.62", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "63000", "lr": "0.00031685", "gnorm": "3.755", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "177002"} [2024-07-08 02:00:26,124][train_inner][INFO] - {"epoch": 1, "update": 0.419, "loss": "1.309", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "104.04", "wer_total": "127.145", "n_error": "23.1", "ppl": "2.48", "accuracy": "81.828", "wer": "18.168", "wps": "69.1", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "63200", "lr": "0.00031784", "gnorm": "3.82", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "177370"} [2024-07-08 02:06:32,945][train_inner][INFO] - {"epoch": 1, "update": 0.42, "loss": "1.29", "ntokens": "127.25", "acc_total": "127.25", "n_correct": "104.61", "wer_total": "127.25", "n_error": "22.61", "ppl": "2.44", "accuracy": "82.208", "wer": "17.768", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "63400", "lr": "0.00031883", "gnorm": "3.559", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "177737"} [2024-07-08 02:12:39,835][train_inner][INFO] - {"epoch": 1, "update": 0.422, "loss": "1.279", "ntokens": "127.455", "acc_total": "127.455", "n_correct": "104.65", "wer_total": "127.455", "n_error": "22.79", "ppl": "2.43", "accuracy": "82.107", "wer": "17.881", "wps": "69.5", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "63600", "lr": "0.00031982", "gnorm": "3.688", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "178104"} [2024-07-08 02:18:47,089][train_inner][INFO] - {"epoch": 1, "update": 0.423, "loss": "1.295", "ntokens": "126.84", "acc_total": "126.84", "n_correct": "104.25", "wer_total": "126.84", "n_error": "22.56", "ppl": "2.45", "accuracy": "82.19", "wer": "17.786", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "63800", "lr": "0.00032081", "gnorm": "3.803", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "178471"} [2024-07-08 02:24:54,185][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "1.267", "ntokens": "126.94", "acc_total": "126.94", "n_correct": "104.08", "wer_total": "126.94", "n_error": "22.86", "ppl": "2.41", "accuracy": "81.991", "wer": "18.009", "wps": "69.2", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "64000", "lr": "0.0003218", "gnorm": "3.789", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "178838"} [2024-07-08 02:31:01,508][train_inner][INFO] - {"epoch": 1, "update": 0.426, "loss": "1.235", "ntokens": "127.715", "acc_total": "127.715", "n_correct": "105.04", "wer_total": "127.715", "n_error": "22.675", "ppl": "2.35", "accuracy": "82.246", "wer": "17.754", "wps": "69.5", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "64200", "lr": "0.00032279", "gnorm": "3.768", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "179206"} [2024-07-08 02:37:08,232][train_inner][INFO] - {"epoch": 1, "update": 0.427, "loss": "1.306", "ntokens": "125.845", "acc_total": "125.845", "n_correct": "103.185", "wer_total": "125.845", "n_error": "22.655", "ppl": "2.47", "accuracy": "81.994", "wer": "18.002", "wps": "68.6", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "64400", "lr": "0.00032378", "gnorm": "3.722", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "179572"} [2024-07-08 02:43:15,363][train_inner][INFO] - {"epoch": 1, "update": 0.428, "loss": "1.252", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "104.67", "wer_total": "126.68", "n_error": "22", "ppl": "2.38", "accuracy": "82.626", "wer": "17.367", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "64600", "lr": "0.00032477", "gnorm": "3.796", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "179939"} [2024-07-08 02:49:22,939][train_inner][INFO] - {"epoch": 1, "update": 0.43, "loss": "1.283", "ntokens": "127.31", "acc_total": "127.31", "n_correct": "104.635", "wer_total": "127.31", "n_error": "22.665", "ppl": "2.43", "accuracy": "82.189", "wer": "17.803", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "64800", "lr": "0.00032576", "gnorm": "3.911", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "180307"} [2024-07-08 02:55:30,034][train_inner][INFO] - {"epoch": 1, "update": 0.431, "loss": "1.247", "ntokens": "126.94", "acc_total": "126.94", "n_correct": "104.42", "wer_total": "126.94", "n_error": "22.505", "ppl": "2.37", "accuracy": "82.259", "wer": "17.729", "wps": "69.2", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "65000", "lr": "0.00032675", "gnorm": "3.646", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "180674"} [2024-07-08 02:55:30,035][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 03:37:23,729][valid][INFO] - {"epoch": 1, "valid_loss": "1.057", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3302", "valid_wer_total": "18.1585", "valid_n_error": "2.82718", "valid_ppl": "2.08", "valid_accuracy": "84.424", "valid_wer": "15.569", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "65000", "valid_best_accuracy": "84.959"} [2024-07-08 03:37:23,729][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 65000 updates [2024-07-08 03:37:23,730][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_65000.pt [2024-07-08 03:37:26,906][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_65000.pt [2024-07-08 03:37:28,981][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_65000.pt (epoch 1 @ 65000 updates, score 84.424) (writing took 5.25149389798753 seconds) [2024-07-08 03:43:35,754][train_inner][INFO] - {"epoch": 1, "update": 0.432, "loss": "1.298", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "104.205", "wer_total": "127.1", "n_error": "22.88", "ppl": "2.46", "accuracy": "81.987", "wer": "18.002", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "65200", "lr": "0.00032774", "gnorm": "3.737", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "183560"} [2024-07-08 03:49:42,495][train_inner][INFO] - {"epoch": 1, "update": 0.434, "loss": "1.298", "ntokens": "125.965", "acc_total": "125.965", "n_correct": "103.34", "wer_total": "125.965", "n_error": "22.615", "ppl": "2.46", "accuracy": "82.039", "wer": "17.953", "wps": "68.7", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "65400", "lr": "0.00032873", "gnorm": "3.847", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "183927"} [2024-07-08 03:55:49,454][train_inner][INFO] - {"epoch": 1, "update": 0.435, "loss": "1.249", "ntokens": "128.76", "acc_total": "128.76", "n_correct": "106.285", "wer_total": "128.76", "n_error": "22.465", "ppl": "2.38", "accuracy": "82.545", "wer": "17.447", "wps": "70.2", "ups": "0.55", "wpb": "128.8", "bsz": "8", "num_updates": "65600", "lr": "0.00032972", "gnorm": "3.748", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "184294"} [2024-07-08 04:01:56,561][train_inner][INFO] - {"epoch": 1, "update": 0.436, "loss": "1.23", "ntokens": "126.39", "acc_total": "126.39", "n_correct": "104.2", "wer_total": "126.39", "n_error": "22.18", "ppl": "2.35", "accuracy": "82.443", "wer": "17.549", "wps": "68.9", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "65800", "lr": "0.00033071", "gnorm": "3.791", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "184661"} [2024-07-08 04:08:03,302][train_inner][INFO] - {"epoch": 1, "update": 0.438, "loss": "1.295", "ntokens": "128.18", "acc_total": "128.18", "n_correct": "105.325", "wer_total": "128.18", "n_error": "22.85", "ppl": "2.45", "accuracy": "82.17", "wer": "17.826", "wps": "69.9", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "66000", "lr": "0.0003317", "gnorm": "3.627", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "185027"} [2024-07-08 04:14:09,868][train_inner][INFO] - {"epoch": 1, "update": 0.439, "loss": "1.268", "ntokens": "127.705", "acc_total": "127.705", "n_correct": "105.1", "wer_total": "127.705", "n_error": "22.58", "ppl": "2.41", "accuracy": "82.299", "wer": "17.681", "wps": "69.7", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "66200", "lr": "0.00033269", "gnorm": "3.737", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "185394"} [2024-07-08 04:20:16,890][train_inner][INFO] - {"epoch": 1, "update": 0.44, "loss": "1.266", "ntokens": "126.42", "acc_total": "126.42", "n_correct": "104.175", "wer_total": "126.42", "n_error": "22.24", "ppl": "2.41", "accuracy": "82.404", "wer": "17.592", "wps": "68.9", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "66400", "lr": "0.00033368", "gnorm": "3.696", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "185761"} [2024-07-08 04:26:23,601][train_inner][INFO] - {"epoch": 1, "update": 0.442, "loss": "1.291", "ntokens": "125.745", "acc_total": "125.745", "n_correct": "103.035", "wer_total": "125.745", "n_error": "22.69", "ppl": "2.45", "accuracy": "81.94", "wer": "18.044", "wps": "68.6", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "66600", "lr": "0.00033467", "gnorm": "3.744", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "186128"} [2024-07-08 04:32:30,605][train_inner][INFO] - {"epoch": 1, "update": 0.443, "loss": "1.319", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "103.96", "wer_total": "127.345", "n_error": "23.385", "ppl": "2.5", "accuracy": "81.636", "wer": "18.364", "wps": "69.4", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "66800", "lr": "0.00033566", "gnorm": "3.687", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "186495"} [2024-07-08 04:38:37,602][train_inner][INFO] - {"epoch": 1, "update": 0.444, "loss": "1.271", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "103.95", "wer_total": "126.945", "n_error": "22.985", "ppl": "2.41", "accuracy": "81.886", "wer": "18.106", "wps": "69.2", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "67000", "lr": "0.00033665", "gnorm": "3.702", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "186862"} [2024-07-08 04:44:44,579][train_inner][INFO] - {"epoch": 1, "update": 0.446, "loss": "1.251", "ntokens": "126.88", "acc_total": "126.88", "n_correct": "104.63", "wer_total": "126.88", "n_error": "22.22", "ppl": "2.38", "accuracy": "82.464", "wer": "17.513", "wps": "69.1", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "67200", "lr": "0.00033764", "gnorm": "3.61", "loss_scale": "4096", "train_wall": "366", "gb_free": "6.5", "wall": "187229"} [2024-07-08 04:46:19,864][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-08 04:47:35,033][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-08 04:50:55,085][train_inner][INFO] - {"epoch": 1, "update": 0.447, "loss": "1.339", "ntokens": "125.98", "acc_total": "125.98", "n_correct": "102.36", "wer_total": "125.98", "n_error": "23.605", "ppl": "2.53", "accuracy": "81.251", "wer": "18.737", "wps": "68", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "67400", "lr": "0.00033863", "gnorm": "3.89", "loss_scale": "1024", "train_wall": "370", "gb_free": "6.5", "wall": "187599"} [2024-07-08 04:53:58,607][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 05:35:49,614][valid][INFO] - {"epoch": 1, "valid_loss": "1.071", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3386", "valid_wer_total": "18.1585", "valid_n_error": "2.81867", "valid_ppl": "2.1", "valid_accuracy": "84.471", "valid_wer": "15.523", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "67500", "valid_best_accuracy": "84.959"} [2024-07-08 05:35:49,615][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 67500 updates [2024-07-08 05:35:49,615][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_67500.pt [2024-07-08 05:35:52,807][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_67500.pt [2024-07-08 05:35:54,915][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_67500.pt (epoch 1 @ 67500 updates, score 84.471) (writing took 5.300553671026137 seconds) [2024-07-08 05:38:58,082][train_inner][INFO] - {"epoch": 1, "update": 0.448, "loss": "1.305", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "103.965", "wer_total": "126.665", "n_error": "22.69", "ppl": "2.47", "accuracy": "82.079", "wer": "17.913", "wps": "8.8", "ups": "0.07", "wpb": "126.7", "bsz": "8", "num_updates": "67600", "lr": "0.00033962", "gnorm": "3.844", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "190482"} [2024-07-08 05:45:05,048][train_inner][INFO] - {"epoch": 1, "update": 0.45, "loss": "1.285", "ntokens": "126.925", "acc_total": "126.925", "n_correct": "104.47", "wer_total": "126.925", "n_error": "22.435", "ppl": "2.44", "accuracy": "82.308", "wer": "17.676", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "67800", "lr": "0.00034061", "gnorm": "3.64", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "190849"} [2024-07-08 05:51:12,237][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "1.279", "ntokens": "127.995", "acc_total": "127.995", "n_correct": "104.695", "wer_total": "127.995", "n_error": "23.3", "ppl": "2.43", "accuracy": "81.796", "wer": "18.204", "wps": "69.7", "ups": "0.54", "wpb": "128", "bsz": "8", "num_updates": "68000", "lr": "0.0003416", "gnorm": "3.639", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "191216"} [2024-07-08 05:57:18,998][train_inner][INFO] - {"epoch": 1, "update": 0.452, "loss": "1.294", "ntokens": "126.69", "acc_total": "126.69", "n_correct": "103.505", "wer_total": "126.69", "n_error": "23.14", "ppl": "2.45", "accuracy": "81.699", "wer": "18.265", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "68200", "lr": "0.00034259", "gnorm": "3.763", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "191583"} [2024-07-08 06:03:25,842][train_inner][INFO] - {"epoch": 1, "update": 0.454, "loss": "1.277", "ntokens": "126.39", "acc_total": "126.39", "n_correct": "103.64", "wer_total": "126.39", "n_error": "22.73", "ppl": "2.42", "accuracy": "82", "wer": "17.984", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "68400", "lr": "0.00034358", "gnorm": "3.637", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "191950"} [2024-07-08 06:09:32,872][train_inner][INFO] - {"epoch": 1, "update": 0.455, "loss": "1.302", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "103.65", "wer_total": "126.77", "n_error": "23.11", "ppl": "2.47", "accuracy": "81.762", "wer": "18.23", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "68600", "lr": "0.00034457", "gnorm": "3.761", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "192317"} [2024-07-08 06:15:39,777][train_inner][INFO] - {"epoch": 1, "update": 0.456, "loss": "1.27", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "103.82", "wer_total": "126.31", "n_error": "22.49", "ppl": "2.41", "accuracy": "82.195", "wer": "17.805", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "68800", "lr": "0.00034556", "gnorm": "3.615", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "192684"} [2024-07-08 06:21:47,409][train_inner][INFO] - {"epoch": 1, "update": 0.458, "loss": "1.261", "ntokens": "128.245", "acc_total": "128.245", "n_correct": "105.645", "wer_total": "128.245", "n_error": "22.59", "ppl": "2.4", "accuracy": "82.377", "wer": "17.615", "wps": "69.8", "ups": "0.54", "wpb": "128.2", "bsz": "8", "num_updates": "69000", "lr": "0.00034655", "gnorm": "3.719", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "193051"} [2024-07-08 06:27:54,600][train_inner][INFO] - {"epoch": 1, "update": 0.459, "loss": "1.311", "ntokens": "127.93", "acc_total": "127.93", "n_correct": "104.78", "wer_total": "127.93", "n_error": "23.14", "ppl": "2.48", "accuracy": "81.904", "wer": "18.088", "wps": "69.7", "ups": "0.54", "wpb": "127.9", "bsz": "8", "num_updates": "69200", "lr": "0.00034754", "gnorm": "3.764", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "193419"} [2024-07-08 06:34:01,560][train_inner][INFO] - {"epoch": 1, "update": 0.46, "loss": "1.284", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "104.655", "wer_total": "127.595", "n_error": "22.925", "ppl": "2.43", "accuracy": "82.021", "wer": "17.967", "wps": "69.5", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "69400", "lr": "0.00034853", "gnorm": "3.684", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "193786"} [2024-07-08 06:40:08,368][train_inner][INFO] - {"epoch": 1, "update": 0.462, "loss": "1.327", "ntokens": "127.51", "acc_total": "127.51", "n_correct": "104.035", "wer_total": "127.51", "n_error": "23.46", "ppl": "2.51", "accuracy": "81.59", "wer": "18.399", "wps": "69.5", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "69600", "lr": "0.00034952", "gnorm": "3.742", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "194152"} [2024-07-08 06:46:15,185][train_inner][INFO] - {"epoch": 1, "update": 0.463, "loss": "1.309", "ntokens": "127.265", "acc_total": "127.265", "n_correct": "104.05", "wer_total": "127.265", "n_error": "23.205", "ppl": "2.48", "accuracy": "81.759", "wer": "18.234", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "69800", "lr": "0.00035051", "gnorm": "3.702", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "194519"} [2024-07-08 06:52:22,092][train_inner][INFO] - {"epoch": 1, "update": 0.464, "loss": "1.256", "ntokens": "128.02", "acc_total": "128.02", "n_correct": "105.145", "wer_total": "128.02", "n_error": "22.875", "ppl": "2.39", "accuracy": "82.132", "wer": "17.868", "wps": "69.8", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "70000", "lr": "0.0003515", "gnorm": "3.672", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "194886"} [2024-07-08 06:52:22,093][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 07:34:12,704][valid][INFO] - {"epoch": 1, "valid_loss": "1.093", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2584", "valid_wer_total": "18.1585", "valid_n_error": "2.89896", "valid_ppl": "2.13", "valid_accuracy": "84.029", "valid_wer": "15.965", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "70000", "valid_best_accuracy": "84.959"} [2024-07-08 07:34:12,704][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 70000 updates [2024-07-08 07:34:12,705][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_70000.pt [2024-07-08 07:34:15,880][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_70000.pt [2024-07-08 07:34:17,971][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_70000.pt (epoch 1 @ 70000 updates, score 84.029) (writing took 5.2666870770044625 seconds) [2024-07-08 07:40:24,880][train_inner][INFO] - {"epoch": 1, "update": 0.466, "loss": "1.305", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "104.025", "wer_total": "127.615", "n_error": "23.58", "ppl": "2.47", "accuracy": "81.515", "wer": "18.477", "wps": "8.9", "ups": "0.07", "wpb": "127.6", "bsz": "8", "num_updates": "70200", "lr": "0.00035249", "gnorm": "3.623", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "197769"} [2024-07-08 07:44:32,401][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-08 07:46:33,604][train_inner][INFO] - {"epoch": 1, "update": 0.467, "loss": "1.276", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "103.775", "wer_total": "126.725", "n_error": "22.945", "ppl": "2.42", "accuracy": "81.89", "wer": "18.106", "wps": "68.7", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "70400", "lr": "0.00035348", "gnorm": "3.695", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "198138"} [2024-07-08 07:52:40,573][train_inner][INFO] - {"epoch": 1, "update": 0.468, "loss": "1.292", "ntokens": "127.305", "acc_total": "127.305", "n_correct": "104.595", "wer_total": "127.305", "n_error": "22.705", "ppl": "2.45", "accuracy": "82.161", "wer": "17.835", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "70600", "lr": "0.00035447", "gnorm": "3.55", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "198505"} [2024-07-08 07:58:48,079][train_inner][INFO] - {"epoch": 1, "update": 0.47, "loss": "1.258", "ntokens": "127.295", "acc_total": "127.295", "n_correct": "105.14", "wer_total": "127.295", "n_error": "22.155", "ppl": "2.39", "accuracy": "82.596", "wer": "17.404", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "70800", "lr": "0.00035546", "gnorm": "3.47", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "198872"} [2024-07-08 08:04:54,705][train_inner][INFO] - {"epoch": 1, "update": 0.471, "loss": "1.278", "ntokens": "127.01", "acc_total": "127.01", "n_correct": "103.995", "wer_total": "127.01", "n_error": "23.015", "ppl": "2.43", "accuracy": "81.879", "wer": "18.121", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "71000", "lr": "0.00035645", "gnorm": "3.616", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "199239"} [2024-07-08 08:11:01,045][train_inner][INFO] - {"epoch": 1, "update": 0.472, "loss": "1.285", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "103.97", "wer_total": "126.805", "n_error": "22.815", "ppl": "2.44", "accuracy": "81.992", "wer": "17.992", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "71200", "lr": "0.00035744", "gnorm": "3.731", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "199605"} [2024-07-08 08:17:07,735][train_inner][INFO] - {"epoch": 1, "update": 0.473, "loss": "1.28", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "104.02", "wer_total": "127.155", "n_error": "23.115", "ppl": "2.43", "accuracy": "81.806", "wer": "18.179", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "71400", "lr": "0.00035843", "gnorm": "3.795", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "199972"} [2024-07-08 08:23:14,243][train_inner][INFO] - {"epoch": 1, "update": 0.475, "loss": "1.288", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "103.875", "wer_total": "126.865", "n_error": "22.97", "ppl": "2.44", "accuracy": "81.878", "wer": "18.106", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "71600", "lr": "0.00035942", "gnorm": "3.606", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "200338"} [2024-07-08 08:29:21,427][train_inner][INFO] - {"epoch": 1, "update": 0.476, "loss": "1.307", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "104.12", "wer_total": "127.055", "n_error": "22.92", "ppl": "2.47", "accuracy": "81.949", "wer": "18.039", "wps": "69.2", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "71800", "lr": "0.00036041", "gnorm": "3.594", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "200705"} [2024-07-08 08:35:28,249][train_inner][INFO] - {"epoch": 1, "update": 0.477, "loss": "1.273", "ntokens": "126.47", "acc_total": "126.47", "n_correct": "103.56", "wer_total": "126.47", "n_error": "22.91", "ppl": "2.42", "accuracy": "81.885", "wer": "18.115", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "72000", "lr": "0.0003614", "gnorm": "3.756", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "201072"} [2024-07-08 08:41:35,169][train_inner][INFO] - {"epoch": 1, "update": 0.479, "loss": "1.309", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "102.775", "wer_total": "126.01", "n_error": "23.23", "ppl": "2.48", "accuracy": "81.561", "wer": "18.435", "wps": "68.7", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "72200", "lr": "0.00036239", "gnorm": "3.64", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "201439"} [2024-07-08 08:47:41,973][train_inner][INFO] - {"epoch": 1, "update": 0.48, "loss": "1.335", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "103.135", "wer_total": "126.6", "n_error": "23.445", "ppl": "2.52", "accuracy": "81.465", "wer": "18.519", "wps": "69", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "72400", "lr": "0.00036338", "gnorm": "3.694", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "201806"} [2024-07-08 08:50:45,493][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 09:32:35,624][valid][INFO] - {"epoch": 1, "valid_loss": "1.094", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2553", "valid_wer_total": "18.1585", "valid_n_error": "2.90171", "valid_ppl": "2.13", "valid_accuracy": "84.012", "valid_wer": "15.98", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "72500", "valid_best_accuracy": "84.959"} [2024-07-08 09:32:35,625][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 72500 updates [2024-07-08 09:32:35,625][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_72500.pt [2024-07-08 09:32:38,815][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_72500.pt [2024-07-08 09:32:40,902][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_72500.pt (epoch 1 @ 72500 updates, score 84.012) (writing took 5.277265743992757 seconds) [2024-07-08 09:35:44,321][train_inner][INFO] - {"epoch": 1, "update": 0.481, "loss": "1.283", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "103.785", "wer_total": "126.605", "n_error": "22.8", "ppl": "2.43", "accuracy": "81.975", "wer": "18.009", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "72600", "lr": "0.00036437", "gnorm": "3.572", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "204688"} [2024-07-08 09:41:51,124][train_inner][INFO] - {"epoch": 1, "update": 0.483, "loss": "1.236", "ntokens": "126.91", "acc_total": "126.91", "n_correct": "104.64", "wer_total": "126.91", "n_error": "22.265", "ppl": "2.36", "accuracy": "82.452", "wer": "17.544", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "72800", "lr": "0.00036536", "gnorm": "3.436", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "205055"} [2024-07-08 09:47:58,217][train_inner][INFO] - {"epoch": 1, "update": 0.484, "loss": "1.283", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "103.8", "wer_total": "126.53", "n_error": "22.72", "ppl": "2.43", "accuracy": "82.036", "wer": "17.956", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "73000", "lr": "0.00036635", "gnorm": "3.546", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "205422"} [2024-07-08 09:54:05,234][train_inner][INFO] - {"epoch": 1, "update": 0.485, "loss": "1.311", "ntokens": "127.73", "acc_total": "127.73", "n_correct": "104.735", "wer_total": "127.73", "n_error": "22.99", "ppl": "2.48", "accuracy": "81.997", "wer": "17.999", "wps": "69.6", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "73200", "lr": "0.00036734", "gnorm": "3.744", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "205789"} [2024-07-08 10:00:12,756][train_inner][INFO] - {"epoch": 1, "update": 0.487, "loss": "1.316", "ntokens": "125.965", "acc_total": "125.965", "n_correct": "102.7", "wer_total": "125.965", "n_error": "23.26", "ppl": "2.49", "accuracy": "81.531", "wer": "18.465", "wps": "68.5", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "73400", "lr": "0.00036833", "gnorm": "3.768", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "206157"} [2024-07-08 10:06:20,018][train_inner][INFO] - {"epoch": 1, "update": 0.488, "loss": "1.293", "ntokens": "127.28", "acc_total": "127.28", "n_correct": "103.94", "wer_total": "127.28", "n_error": "23.325", "ppl": "2.45", "accuracy": "81.662", "wer": "18.326", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "73600", "lr": "0.00036932", "gnorm": "3.613", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "206524"} [2024-07-08 10:12:27,003][train_inner][INFO] - {"epoch": 1, "update": 0.489, "loss": "1.306", "ntokens": "126.765", "acc_total": "126.765", "n_correct": "103.315", "wer_total": "126.765", "n_error": "23.435", "ppl": "2.47", "accuracy": "81.501", "wer": "18.487", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "73800", "lr": "0.00037031", "gnorm": "3.497", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "206891"} [2024-07-08 10:14:38,873][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-08 10:18:35,748][train_inner][INFO] - {"epoch": 1, "update": 0.491, "loss": "1.3", "ntokens": "127.565", "acc_total": "127.565", "n_correct": "104.775", "wer_total": "127.565", "n_error": "22.785", "ppl": "2.46", "accuracy": "82.135", "wer": "17.861", "wps": "69.2", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "74000", "lr": "0.0003713", "gnorm": "3.59", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "207260"} [2024-07-08 10:24:42,461][train_inner][INFO] - {"epoch": 1, "update": 0.492, "loss": "1.29", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "103.525", "wer_total": "126.305", "n_error": "22.765", "ppl": "2.45", "accuracy": "81.964", "wer": "18.024", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "74200", "lr": "0.00037229", "gnorm": "3.331", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "207627"} [2024-07-08 10:24:44,219][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-08 10:30:51,231][train_inner][INFO] - {"epoch": 1, "update": 0.493, "loss": "1.297", "ntokens": "127.255", "acc_total": "127.255", "n_correct": "104.115", "wer_total": "127.255", "n_error": "23.12", "ppl": "2.46", "accuracy": "81.816", "wer": "18.168", "wps": "69", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "74400", "lr": "0.00037328", "gnorm": "3.655", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "207995"} [2024-07-08 10:36:58,037][train_inner][INFO] - {"epoch": 1, "update": 0.495, "loss": "1.289", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "104.395", "wer_total": "127.42", "n_error": "23.005", "ppl": "2.44", "accuracy": "81.93", "wer": "18.054", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "74600", "lr": "0.00037427", "gnorm": "3.508", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "208362"} [2024-07-08 10:43:04,594][train_inner][INFO] - {"epoch": 1, "update": 0.496, "loss": "1.257", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "104.505", "wer_total": "127.32", "n_error": "22.81", "ppl": "2.39", "accuracy": "82.081", "wer": "17.915", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "74800", "lr": "0.00037526", "gnorm": "3.603", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "208729"} [2024-07-08 10:49:11,225][train_inner][INFO] - {"epoch": 1, "update": 0.497, "loss": "1.312", "ntokens": "128.02", "acc_total": "128.02", "n_correct": "104.865", "wer_total": "128.02", "n_error": "23.15", "ppl": "2.48", "accuracy": "81.913", "wer": "18.083", "wps": "69.8", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "75000", "lr": "0.00037625", "gnorm": "3.756", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "209095"} [2024-07-08 10:49:11,225][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 11:31:02,878][valid][INFO] - {"epoch": 1, "valid_loss": "1.059", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3418", "valid_wer_total": "18.1585", "valid_n_error": "2.81517", "valid_ppl": "2.08", "valid_accuracy": "84.488", "valid_wer": "15.503", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "75000", "valid_best_accuracy": "84.959"} [2024-07-08 11:31:02,879][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 75000 updates [2024-07-08 11:31:02,879][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_75000.pt [2024-07-08 11:31:06,054][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_75000.pt [2024-07-08 11:31:08,172][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_75000.pt (epoch 1 @ 75000 updates, score 84.488) (writing took 5.292802530049812 seconds) [2024-07-08 11:37:14,869][train_inner][INFO] - {"epoch": 1, "update": 0.499, "loss": "1.344", "ntokens": "127.59", "acc_total": "127.59", "n_correct": "104.035", "wer_total": "127.59", "n_error": "23.54", "ppl": "2.54", "accuracy": "81.539", "wer": "18.45", "wps": "8.8", "ups": "0.07", "wpb": "127.6", "bsz": "8", "num_updates": "75200", "lr": "0.00037724", "gnorm": "3.592", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "211979"} [2024-07-08 11:43:21,724][train_inner][INFO] - {"epoch": 1, "update": 0.5, "loss": "1.268", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "103.895", "wer_total": "126.325", "n_error": "22.425", "ppl": "2.41", "accuracy": "82.244", "wer": "17.752", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "75400", "lr": "0.00037823", "gnorm": "3.651", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "212346"} [2024-07-08 11:49:28,658][train_inner][INFO] - {"epoch": 1, "update": 0.501, "loss": "1.283", "ntokens": "127.69", "acc_total": "127.69", "n_correct": "104.795", "wer_total": "127.69", "n_error": "22.88", "ppl": "2.43", "accuracy": "82.07", "wer": "17.918", "wps": "69.6", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "75600", "lr": "0.00037922", "gnorm": "3.516", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "212713"} [2024-07-08 11:55:36,424][train_inner][INFO] - {"epoch": 1, "update": 0.503, "loss": "1.303", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "103.59", "wer_total": "127.09", "n_error": "23.49", "ppl": "2.47", "accuracy": "81.509", "wer": "18.483", "wps": "69.1", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "75800", "lr": "0.00038021", "gnorm": "3.373", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "213080"} [2024-07-08 12:01:43,706][train_inner][INFO] - {"epoch": 1, "update": 0.504, "loss": "1.312", "ntokens": "128.265", "acc_total": "128.265", "n_correct": "104.47", "wer_total": "128.265", "n_error": "23.785", "ppl": "2.48", "accuracy": "81.449", "wer": "18.544", "wps": "69.8", "ups": "0.54", "wpb": "128.3", "bsz": "8", "num_updates": "76000", "lr": "0.0003812", "gnorm": "3.56", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "213448"} [2024-07-08 12:07:50,460][train_inner][INFO] - {"epoch": 1, "update": 0.505, "loss": "1.306", "ntokens": "126.285", "acc_total": "126.285", "n_correct": "103.115", "wer_total": "126.285", "n_error": "23.16", "ppl": "2.47", "accuracy": "81.653", "wer": "18.339", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "76200", "lr": "0.00038219", "gnorm": "3.519", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "213815"} [2024-07-08 12:13:57,765][train_inner][INFO] - {"epoch": 1, "update": 0.507, "loss": "1.3", "ntokens": "128.995", "acc_total": "128.995", "n_correct": "105.79", "wer_total": "128.995", "n_error": "23.185", "ppl": "2.46", "accuracy": "82.011", "wer": "17.974", "wps": "70.2", "ups": "0.54", "wpb": "129", "bsz": "8", "num_updates": "76400", "lr": "0.00038318", "gnorm": "3.662", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "214182"} [2024-07-08 12:20:04,498][train_inner][INFO] - {"epoch": 1, "update": 0.508, "loss": "1.3", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "103.865", "wer_total": "127.055", "n_error": "23.18", "ppl": "2.46", "accuracy": "81.748", "wer": "18.244", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "76600", "lr": "0.00038417", "gnorm": "3.548", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "214549"} [2024-07-08 12:26:11,139][train_inner][INFO] - {"epoch": 1, "update": 0.509, "loss": "1.311", "ntokens": "126.44", "acc_total": "126.44", "n_correct": "102.98", "wer_total": "126.44", "n_error": "23.455", "ppl": "2.48", "accuracy": "81.446", "wer": "18.55", "wps": "69", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "76800", "lr": "0.00038516", "gnorm": "3.732", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "214915"} [2024-07-08 12:32:18,061][train_inner][INFO] - {"epoch": 1, "update": 0.511, "loss": "1.277", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "103.36", "wer_total": "126.46", "n_error": "23.095", "ppl": "2.42", "accuracy": "81.733", "wer": "18.263", "wps": "68.9", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "77000", "lr": "0.00038615", "gnorm": "3.515", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "215282"} [2024-07-08 12:38:25,179][train_inner][INFO] - {"epoch": 1, "update": 0.512, "loss": "1.232", "ntokens": "126.7", "acc_total": "126.7", "n_correct": "104.76", "wer_total": "126.7", "n_error": "21.91", "ppl": "2.35", "accuracy": "82.684", "wer": "17.293", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "77200", "lr": "0.00038714", "gnorm": "3.461", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "215649"} [2024-07-08 12:44:32,289][train_inner][INFO] - {"epoch": 1, "update": 0.513, "loss": "1.336", "ntokens": "126.25", "acc_total": "126.25", "n_correct": "102.395", "wer_total": "126.25", "n_error": "23.835", "ppl": "2.52", "accuracy": "81.105", "wer": "18.879", "wps": "68.8", "ups": "0.54", "wpb": "126.2", "bsz": "8", "num_updates": "77400", "lr": "0.00038813", "gnorm": "3.55", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "216016"} [2024-07-08 12:47:35,828][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 13:29:29,039][valid][INFO] - {"epoch": 1, "valid_loss": "1.065", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3414", "valid_wer_total": "18.1585", "valid_n_error": "2.81613", "valid_ppl": "2.09", "valid_accuracy": "84.486", "valid_wer": "15.509", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "77500", "valid_best_accuracy": "84.959"} [2024-07-08 13:29:29,040][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 77500 updates [2024-07-08 13:29:29,040][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_77500.pt [2024-07-08 13:29:32,229][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_77500.pt [2024-07-08 13:29:34,362][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_77500.pt (epoch 1 @ 77500 updates, score 84.486) (writing took 5.322088117012754 seconds) [2024-07-08 13:32:37,582][train_inner][INFO] - {"epoch": 1, "update": 0.515, "loss": "1.329", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "103.605", "wer_total": "127.075", "n_error": "23.465", "ppl": "2.51", "accuracy": "81.531", "wer": "18.465", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "77600", "lr": "0.00038912", "gnorm": "3.525", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "218902"} [2024-07-08 13:38:45,136][train_inner][INFO] - {"epoch": 1, "update": 0.516, "loss": "1.311", "ntokens": "126.32", "acc_total": "126.32", "n_correct": "103.15", "wer_total": "126.32", "n_error": "23.155", "ppl": "2.48", "accuracy": "81.658", "wer": "18.33", "wps": "68.7", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "77800", "lr": "0.00039011", "gnorm": "3.68", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "219269"} [2024-07-08 13:44:52,605][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "1.253", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "104.35", "wer_total": "127.615", "n_error": "23.25", "ppl": "2.38", "accuracy": "81.769", "wer": "18.219", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "78000", "lr": "0.0003911", "gnorm": "3.702", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "219637"} [2024-07-08 13:50:59,761][train_inner][INFO] - {"epoch": 1, "update": 0.519, "loss": "1.253", "ntokens": "125.215", "acc_total": "125.215", "n_correct": "103.625", "wer_total": "125.215", "n_error": "21.585", "ppl": "2.38", "accuracy": "82.758", "wer": "17.238", "wps": "68.2", "ups": "0.54", "wpb": "125.2", "bsz": "8", "num_updates": "78200", "lr": "0.00039209", "gnorm": "3.254", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "220004"} [2024-07-08 13:57:07,035][train_inner][INFO] - {"epoch": 1, "update": 0.52, "loss": "1.282", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "103.755", "wer_total": "126.58", "n_error": "22.825", "ppl": "2.43", "accuracy": "81.968", "wer": "18.032", "wps": "68.9", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "78400", "lr": "0.00039308", "gnorm": "3.49", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "220371"} [2024-07-08 14:03:14,392][train_inner][INFO] - {"epoch": 1, "update": 0.521, "loss": "1.325", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "103.35", "wer_total": "126.725", "n_error": "23.36", "ppl": "2.5", "accuracy": "81.555", "wer": "18.434", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "78600", "lr": "0.00039407", "gnorm": "3.534", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "220738"} [2024-07-08 14:09:22,172][train_inner][INFO] - {"epoch": 1, "update": 0.523, "loss": "1.289", "ntokens": "126.455", "acc_total": "126.455", "n_correct": "103.45", "wer_total": "126.455", "n_error": "22.99", "ppl": "2.44", "accuracy": "81.808", "wer": "18.18", "wps": "68.8", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "78800", "lr": "0.00039506", "gnorm": "3.528", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "221106"} [2024-07-08 14:09:49,724][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-08 14:15:31,693][train_inner][INFO] - {"epoch": 1, "update": 0.524, "loss": "1.219", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "104.48", "wer_total": "126.18", "n_error": "21.69", "ppl": "2.33", "accuracy": "82.802", "wer": "17.19", "wps": "68.3", "ups": "0.54", "wpb": "126.2", "bsz": "8", "num_updates": "79000", "lr": "0.00039605", "gnorm": "3.51", "loss_scale": "1024", "train_wall": "369", "gb_free": "6.5", "wall": "221476"} [2024-07-08 14:21:39,619][train_inner][INFO] - {"epoch": 1, "update": 0.525, "loss": "1.249", "ntokens": "127.86", "acc_total": "127.86", "n_correct": "105.22", "wer_total": "127.86", "n_error": "22.635", "ppl": "2.38", "accuracy": "82.293", "wer": "17.703", "wps": "69.5", "ups": "0.54", "wpb": "127.9", "bsz": "8", "num_updates": "79200", "lr": "0.00039704", "gnorm": "3.388", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "221844"} [2024-07-08 14:27:47,149][train_inner][INFO] - {"epoch": 1, "update": 0.527, "loss": "1.297", "ntokens": "125.77", "acc_total": "125.77", "n_correct": "102.785", "wer_total": "125.77", "n_error": "22.96", "ppl": "2.46", "accuracy": "81.725", "wer": "18.256", "wps": "68.4", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "79400", "lr": "0.00039803", "gnorm": "3.726", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "222211"} [2024-07-08 14:33:54,713][train_inner][INFO] - {"epoch": 1, "update": 0.528, "loss": "1.303", "ntokens": "126.72", "acc_total": "126.72", "n_correct": "103.875", "wer_total": "126.72", "n_error": "22.835", "ppl": "2.47", "accuracy": "81.972", "wer": "18.02", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "79600", "lr": "0.00039902", "gnorm": "3.668", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "222579"} [2024-07-08 14:40:02,091][train_inner][INFO] - {"epoch": 1, "update": 0.529, "loss": "1.308", "ntokens": "126.71", "acc_total": "126.71", "n_correct": "103.24", "wer_total": "126.71", "n_error": "23.47", "ppl": "2.48", "accuracy": "81.477", "wer": "18.523", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "79800", "lr": "0.00040001", "gnorm": "3.54", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "222946"} [2024-07-08 14:46:09,437][train_inner][INFO] - {"epoch": 1, "update": 0.531, "loss": "1.354", "ntokens": "125.78", "acc_total": "125.78", "n_correct": "101.84", "wer_total": "125.78", "n_error": "23.925", "ppl": "2.56", "accuracy": "80.967", "wer": "19.021", "wps": "68.5", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "80000", "lr": "0.000401", "gnorm": "3.804", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "223313"} [2024-07-08 14:46:09,437][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 15:28:07,764][valid][INFO] - {"epoch": 1, "valid_loss": "1.102", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2021", "valid_wer_total": "18.1585", "valid_n_error": "2.95506", "valid_ppl": "2.15", "valid_accuracy": "83.719", "valid_wer": "16.274", "valid_wps": "173", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "80000", "valid_best_accuracy": "84.959"} [2024-07-08 15:28:07,765][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 80000 updates [2024-07-08 15:28:07,765][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_80000.pt [2024-07-08 15:28:10,973][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_80000.pt [2024-07-08 15:28:13,072][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_80000.pt (epoch 1 @ 80000 updates, score 83.719) (writing took 5.307779273018241 seconds) [2024-07-08 15:34:20,222][train_inner][INFO] - {"epoch": 1, "update": 0.532, "loss": "1.342", "ntokens": "127.105", "acc_total": "127.105", "n_correct": "103.065", "wer_total": "127.105", "n_error": "24.03", "ppl": "2.53", "accuracy": "81.087", "wer": "18.906", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "80200", "lr": "0.00040199", "gnorm": "3.72", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "226204"} [2024-07-08 15:40:27,186][train_inner][INFO] - {"epoch": 1, "update": 0.533, "loss": "1.309", "ntokens": "125.415", "acc_total": "125.415", "n_correct": "102.315", "wer_total": "125.415", "n_error": "23.075", "ppl": "2.48", "accuracy": "81.581", "wer": "18.399", "wps": "68.4", "ups": "0.55", "wpb": "125.4", "bsz": "8", "num_updates": "80400", "lr": "0.00040298", "gnorm": "3.585", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "226571"} [2024-07-08 15:46:34,671][train_inner][INFO] - {"epoch": 1, "update": 0.534, "loss": "1.344", "ntokens": "127.585", "acc_total": "127.585", "n_correct": "103.74", "wer_total": "127.585", "n_error": "23.825", "ppl": "2.54", "accuracy": "81.31", "wer": "18.674", "wps": "69.4", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "80600", "lr": "0.00040397", "gnorm": "3.507", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "226939"} [2024-07-08 15:52:42,514][train_inner][INFO] - {"epoch": 1, "update": 0.536, "loss": "1.294", "ntokens": "127.7", "acc_total": "127.7", "n_correct": "104.54", "wer_total": "127.7", "n_error": "23.155", "ppl": "2.45", "accuracy": "81.864", "wer": "18.132", "wps": "69.4", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "80800", "lr": "0.00040496", "gnorm": "3.394", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "227307"} [2024-07-08 15:58:50,211][train_inner][INFO] - {"epoch": 1, "update": 0.537, "loss": "1.257", "ntokens": "126.895", "acc_total": "126.895", "n_correct": "104", "wer_total": "126.895", "n_error": "22.87", "ppl": "2.39", "accuracy": "81.958", "wer": "18.023", "wps": "69", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "81000", "lr": "0.00040595", "gnorm": "3.445", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "227674"} [2024-07-08 15:59:35,912][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-08 16:04:59,217][train_inner][INFO] - {"epoch": 1, "update": 0.538, "loss": "1.382", "ntokens": "126.07", "acc_total": "126.07", "n_correct": "102.265", "wer_total": "126.07", "n_error": "23.79", "ppl": "2.61", "accuracy": "81.118", "wer": "18.87", "wps": "68.3", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "81200", "lr": "0.00040694", "gnorm": "3.64", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "228043"} [2024-07-08 16:11:06,352][train_inner][INFO] - {"epoch": 1, "update": 0.54, "loss": "1.27", "ntokens": "126.06", "acc_total": "126.06", "n_correct": "103.415", "wer_total": "126.06", "n_error": "22.635", "ppl": "2.41", "accuracy": "82.036", "wer": "17.956", "wps": "68.7", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "81400", "lr": "0.00040793", "gnorm": "3.593", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "228410"} [2024-07-08 16:17:13,285][train_inner][INFO] - {"epoch": 1, "update": 0.541, "loss": "1.338", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "102.985", "wer_total": "126.86", "n_error": "23.87", "ppl": "2.53", "accuracy": "81.18", "wer": "18.816", "wps": "69.1", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "81600", "lr": "0.00040892", "gnorm": "3.65", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "228777"} [2024-07-08 16:23:20,208][train_inner][INFO] - {"epoch": 1, "update": 0.542, "loss": "1.335", "ntokens": "127.04", "acc_total": "127.04", "n_correct": "103.21", "wer_total": "127.04", "n_error": "23.815", "ppl": "2.52", "accuracy": "81.242", "wer": "18.746", "wps": "69.2", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "81800", "lr": "0.00040991", "gnorm": "3.714", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "229144"} [2024-07-08 16:29:27,192][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "1.308", "ntokens": "128.61", "acc_total": "128.61", "n_correct": "104.805", "wer_total": "128.61", "n_error": "23.785", "ppl": "2.48", "accuracy": "81.491", "wer": "18.494", "wps": "70.1", "ups": "0.54", "wpb": "128.6", "bsz": "8", "num_updates": "82000", "lr": "0.0004109", "gnorm": "3.504", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "229511"} [2024-07-08 16:35:34,162][train_inner][INFO] - {"epoch": 1, "update": 0.545, "loss": "1.245", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "104.495", "wer_total": "127.405", "n_error": "22.905", "ppl": "2.37", "accuracy": "82.018", "wer": "17.978", "wps": "69.4", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "82200", "lr": "0.00041189", "gnorm": "3.498", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "229878"} [2024-07-08 16:41:41,361][train_inner][INFO] - {"epoch": 1, "update": 0.546, "loss": "1.283", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "104.61", "wer_total": "127.595", "n_error": "22.97", "ppl": "2.43", "accuracy": "81.986", "wer": "18.002", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "82400", "lr": "0.00041288", "gnorm": "3.652", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "230245"} [2024-07-08 16:44:44,889][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 17:26:38,128][valid][INFO] - {"epoch": 1, "valid_loss": "1.044", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3975", "valid_wer_total": "18.1585", "valid_n_error": "2.75898", "valid_ppl": "2.06", "valid_accuracy": "84.795", "valid_wer": "15.194", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "82500", "valid_best_accuracy": "84.959"} [2024-07-08 17:26:38,129][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 82500 updates [2024-07-08 17:26:38,129][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_82500.pt [2024-07-08 17:26:41,304][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_82500.pt [2024-07-08 17:26:43,418][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_82500.pt (epoch 1 @ 82500 updates, score 84.795) (writing took 5.289314439985901 seconds) [2024-07-08 17:29:46,761][train_inner][INFO] - {"epoch": 1, "update": 0.548, "loss": "1.294", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "103.18", "wer_total": "126.56", "n_error": "23.35", "ppl": "2.45", "accuracy": "81.527", "wer": "18.45", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "82600", "lr": "0.00041387", "gnorm": "3.42", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "233131"} [2024-07-08 17:35:54,232][train_inner][INFO] - {"epoch": 1, "update": 0.549, "loss": "1.389", "ntokens": "127.445", "acc_total": "127.445", "n_correct": "103.13", "wer_total": "127.445", "n_error": "24.305", "ppl": "2.62", "accuracy": "80.921", "wer": "19.071", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "82800", "lr": "0.00041486", "gnorm": "3.854", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "233498"} [2024-07-08 17:42:01,210][train_inner][INFO] - {"epoch": 1, "update": 0.55, "loss": "1.289", "ntokens": "125.98", "acc_total": "125.98", "n_correct": "103.205", "wer_total": "125.98", "n_error": "22.76", "ppl": "2.44", "accuracy": "81.922", "wer": "18.066", "wps": "68.7", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "83000", "lr": "0.00041585", "gnorm": "3.751", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "233865"} [2024-07-08 17:48:08,536][train_inner][INFO] - {"epoch": 1, "update": 0.552, "loss": "1.38", "ntokens": "127.21", "acc_total": "127.21", "n_correct": "102.99", "wer_total": "127.21", "n_error": "24.21", "ppl": "2.6", "accuracy": "80.961", "wer": "19.032", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "83200", "lr": "0.00041684", "gnorm": "3.623", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "234233"} [2024-07-08 17:54:15,720][train_inner][INFO] - {"epoch": 1, "update": 0.553, "loss": "1.405", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "101.815", "wer_total": "126.835", "n_error": "24.995", "ppl": "2.65", "accuracy": "80.274", "wer": "19.707", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "83400", "lr": "0.00041783", "gnorm": "3.729", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "234600"} [2024-07-08 18:00:22,819][train_inner][INFO] - {"epoch": 1, "update": 0.554, "loss": "1.36", "ntokens": "126.1", "acc_total": "126.1", "n_correct": "102.2", "wer_total": "126.1", "n_error": "23.895", "ppl": "2.57", "accuracy": "81.047", "wer": "18.949", "wps": "68.7", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "83600", "lr": "0.00041882", "gnorm": "3.8", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "234967"} [2024-07-08 18:06:30,169][train_inner][INFO] - {"epoch": 1, "update": 0.556, "loss": "1.324", "ntokens": "126.525", "acc_total": "126.525", "n_correct": "103.015", "wer_total": "126.525", "n_error": "23.505", "ppl": "2.5", "accuracy": "81.419", "wer": "18.577", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "83800", "lr": "0.00041981", "gnorm": "3.829", "loss_scale": "2048", "train_wall": "367", "gb_free": "6.5", "wall": "235334"} [2024-07-08 18:12:37,217][train_inner][INFO] - {"epoch": 1, "update": 0.557, "loss": "1.349", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "102.515", "wer_total": "126.815", "n_error": "24.285", "ppl": "2.55", "accuracy": "80.838", "wer": "19.15", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "84000", "lr": "0.0004208", "gnorm": "3.625", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "235701"} [2024-07-08 18:16:10,021][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-08 18:18:46,008][train_inner][INFO] - {"epoch": 1, "update": 0.558, "loss": "1.32", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "103.185", "wer_total": "126.705", "n_error": "23.51", "ppl": "2.5", "accuracy": "81.437", "wer": "18.555", "wps": "68.7", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "84200", "lr": "0.00042179", "gnorm": "3.544", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "236070"} [2024-07-08 18:23:34,170][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-08 18:24:54,913][train_inner][INFO] - {"epoch": 1, "update": 0.56, "loss": "1.309", "ntokens": "127.035", "acc_total": "127.035", "n_correct": "103.08", "wer_total": "127.035", "n_error": "23.94", "ppl": "2.48", "accuracy": "81.143", "wer": "18.845", "wps": "68.9", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "84400", "lr": "0.00042278", "gnorm": "3.366", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "236439"} [2024-07-08 18:31:02,747][train_inner][INFO] - {"epoch": 1, "update": 0.561, "loss": "1.35", "ntokens": "126.73", "acc_total": "126.73", "n_correct": "102.545", "wer_total": "126.73", "n_error": "24.145", "ppl": "2.55", "accuracy": "80.916", "wer": "19.052", "wps": "68.9", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "84600", "lr": "0.00042377", "gnorm": "3.603", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "236807"} [2024-07-08 18:37:09,849][train_inner][INFO] - {"epoch": 1, "update": 0.562, "loss": "1.398", "ntokens": "127.505", "acc_total": "127.505", "n_correct": "102.895", "wer_total": "127.505", "n_error": "24.6", "ppl": "2.63", "accuracy": "80.699", "wer": "19.293", "wps": "69.5", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "84800", "lr": "0.00042476", "gnorm": "3.614", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "237174"} [2024-07-08 18:43:17,410][train_inner][INFO] - {"epoch": 1, "update": 0.564, "loss": "1.31", "ntokens": "127.68", "acc_total": "127.68", "n_correct": "103.9", "wer_total": "127.68", "n_error": "23.77", "ppl": "2.48", "accuracy": "81.375", "wer": "18.617", "wps": "69.5", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "85000", "lr": "0.00042575", "gnorm": "3.425", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "237541"} [2024-07-08 18:43:17,411][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 19:25:07,232][valid][INFO] - {"epoch": 1, "valid_loss": "1.056", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3621", "valid_wer_total": "18.1585", "valid_n_error": "2.79491", "valid_ppl": "2.08", "valid_accuracy": "84.6", "valid_wer": "15.392", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "85000", "valid_best_accuracy": "84.959"} [2024-07-08 19:25:07,233][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 85000 updates [2024-07-08 19:25:07,233][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_85000.pt [2024-07-08 19:25:10,399][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_85000.pt [2024-07-08 19:25:12,528][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_85000.pt (epoch 1 @ 85000 updates, score 84.6) (writing took 5.295092003012542 seconds) [2024-07-08 19:31:19,508][train_inner][INFO] - {"epoch": 1, "update": 0.565, "loss": "1.31", "ntokens": "126.185", "acc_total": "126.185", "n_correct": "102.895", "wer_total": "126.185", "n_error": "23.27", "ppl": "2.48", "accuracy": "81.543", "wer": "18.441", "wps": "8.8", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "85200", "lr": "0.00042674", "gnorm": "3.636", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "240424"} [2024-07-08 19:37:26,641][train_inner][INFO] - {"epoch": 1, "update": 0.566, "loss": "1.349", "ntokens": "127.59", "acc_total": "127.59", "n_correct": "103.72", "wer_total": "127.59", "n_error": "23.86", "ppl": "2.55", "accuracy": "81.292", "wer": "18.701", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "85400", "lr": "0.00042773", "gnorm": "3.912", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "240791"} [2024-07-08 19:43:33,306][train_inner][INFO] - {"epoch": 1, "update": 0.568, "loss": "1.301", "ntokens": "126.415", "acc_total": "126.415", "n_correct": "103.1", "wer_total": "126.415", "n_error": "23.295", "ppl": "2.46", "accuracy": "81.557", "wer": "18.427", "wps": "69", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "85600", "lr": "0.00042872", "gnorm": "3.597", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "241157"} [2024-07-08 19:49:40,013][train_inner][INFO] - {"epoch": 1, "update": 0.569, "loss": "1.355", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "102.42", "wer_total": "126.935", "n_error": "24.5", "ppl": "2.56", "accuracy": "80.687", "wer": "19.301", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "85800", "lr": "0.00042971", "gnorm": "3.639", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "241524"} [2024-07-08 19:55:47,496][train_inner][INFO] - {"epoch": 1, "update": 0.57, "loss": "1.336", "ntokens": "125.845", "acc_total": "125.845", "n_correct": "101.88", "wer_total": "125.845", "n_error": "23.955", "ppl": "2.53", "accuracy": "80.957", "wer": "19.035", "wps": "68.5", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "86000", "lr": "0.0004307", "gnorm": "3.618", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "241892"} [2024-07-08 20:01:54,287][train_inner][INFO] - {"epoch": 1, "update": 0.572, "loss": "1.299", "ntokens": "126.89", "acc_total": "126.89", "n_correct": "103.635", "wer_total": "126.89", "n_error": "23.245", "ppl": "2.46", "accuracy": "81.673", "wer": "18.319", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "86200", "lr": "0.00043169", "gnorm": "3.562", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "242258"} [2024-07-08 20:08:00,878][train_inner][INFO] - {"epoch": 1, "update": 0.573, "loss": "1.358", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "102.99", "wer_total": "127.375", "n_error": "24.36", "ppl": "2.56", "accuracy": "80.856", "wer": "19.125", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "86400", "lr": "0.00043268", "gnorm": "3.474", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "242625"} [2024-07-08 20:14:07,675][train_inner][INFO] - {"epoch": 1, "update": 0.574, "loss": "1.325", "ntokens": "127.415", "acc_total": "127.415", "n_correct": "103.585", "wer_total": "127.415", "n_error": "23.825", "ppl": "2.51", "accuracy": "81.297", "wer": "18.699", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "86600", "lr": "0.00043367", "gnorm": "3.845", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "242992"} [2024-07-08 20:20:14,516][train_inner][INFO] - {"epoch": 1, "update": 0.576, "loss": "1.35", "ntokens": "126.55", "acc_total": "126.55", "n_correct": "102.36", "wer_total": "126.55", "n_error": "24.175", "ppl": "2.55", "accuracy": "80.885", "wer": "19.103", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "86800", "lr": "0.00043466", "gnorm": "3.607", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "243359"} [2024-07-08 20:26:21,390][train_inner][INFO] - {"epoch": 1, "update": 0.577, "loss": "1.379", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "102.62", "wer_total": "126.785", "n_error": "24.15", "ppl": "2.6", "accuracy": "80.94", "wer": "19.048", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "87000", "lr": "0.00043565", "gnorm": "3.564", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "243725"} [2024-07-08 20:32:28,768][train_inner][INFO] - {"epoch": 1, "update": 0.578, "loss": "1.333", "ntokens": "126.13", "acc_total": "126.13", "n_correct": "102.53", "wer_total": "126.13", "n_error": "23.6", "ppl": "2.52", "accuracy": "81.289", "wer": "18.711", "wps": "68.7", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "87200", "lr": "0.00043664", "gnorm": "3.627", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "244093"} [2024-07-08 20:38:35,739][train_inner][INFO] - {"epoch": 1, "update": 0.58, "loss": "1.382", "ntokens": "125.97", "acc_total": "125.97", "n_correct": "101.495", "wer_total": "125.97", "n_error": "24.47", "ppl": "2.61", "accuracy": "80.571", "wer": "19.425", "wps": "68.7", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "87400", "lr": "0.00043763", "gnorm": "3.971", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "244460"} [2024-07-08 20:41:39,096][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 21:23:30,010][valid][INFO] - {"epoch": 1, "valid_loss": "1.109", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2313", "valid_wer_total": "18.1585", "valid_n_error": "2.92634", "valid_ppl": "2.16", "valid_accuracy": "83.88", "valid_wer": "16.116", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "87500", "valid_best_accuracy": "84.959"} [2024-07-08 21:23:30,011][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 87500 updates [2024-07-08 21:23:30,011][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_87500.pt [2024-07-08 21:23:33,198][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_87500.pt [2024-07-08 21:23:35,134][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_87500.pt (epoch 1 @ 87500 updates, score 83.88) (writing took 5.123119310010225 seconds) [2024-07-08 21:26:38,416][train_inner][INFO] - {"epoch": 1, "update": 0.581, "loss": "1.31", "ntokens": "127.455", "acc_total": "127.455", "n_correct": "103.605", "wer_total": "127.455", "n_error": "23.845", "ppl": "2.48", "accuracy": "81.288", "wer": "18.709", "wps": "8.8", "ups": "0.07", "wpb": "127.5", "bsz": "8", "num_updates": "87600", "lr": "0.00043862", "gnorm": "3.645", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "247342"} [2024-07-08 21:32:45,262][train_inner][INFO] - {"epoch": 1, "update": 0.582, "loss": "1.32", "ntokens": "127.14", "acc_total": "127.14", "n_correct": "103.17", "wer_total": "127.14", "n_error": "23.955", "ppl": "2.5", "accuracy": "81.147", "wer": "18.841", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "87800", "lr": "0.00043961", "gnorm": "3.411", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "247709"} [2024-07-08 21:38:51,877][train_inner][INFO] - {"epoch": 1, "update": 0.584, "loss": "1.318", "ntokens": "125.935", "acc_total": "125.935", "n_correct": "102.48", "wer_total": "125.935", "n_error": "23.42", "ppl": "2.49", "accuracy": "81.375", "wer": "18.597", "wps": "68.7", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "88000", "lr": "0.0004406", "gnorm": "3.671", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "248076"} [2024-07-08 21:44:58,641][train_inner][INFO] - {"epoch": 1, "update": 0.585, "loss": "1.303", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "103.76", "wer_total": "126.905", "n_error": "23.13", "ppl": "2.47", "accuracy": "81.762", "wer": "18.226", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "88200", "lr": "0.00044159", "gnorm": "3.639", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "248443"} [2024-07-08 21:51:05,560][train_inner][INFO] - {"epoch": 1, "update": 0.586, "loss": "1.355", "ntokens": "126.535", "acc_total": "126.535", "n_correct": "102.43", "wer_total": "126.535", "n_error": "24.085", "ppl": "2.56", "accuracy": "80.95", "wer": "19.034", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "88400", "lr": "0.00044258", "gnorm": "3.719", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "248810"} [2024-07-08 21:57:12,358][train_inner][INFO] - {"epoch": 1, "update": 0.588, "loss": "1.392", "ntokens": "127.14", "acc_total": "127.14", "n_correct": "102.055", "wer_total": "127.14", "n_error": "25.07", "ppl": "2.63", "accuracy": "80.27", "wer": "19.718", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "88600", "lr": "0.00044357", "gnorm": "3.633", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "249176"} [2024-07-08 22:03:18,917][train_inner][INFO] - {"epoch": 1, "update": 0.589, "loss": "1.328", "ntokens": "125.78", "acc_total": "125.78", "n_correct": "102.235", "wer_total": "125.78", "n_error": "23.515", "ppl": "2.51", "accuracy": "81.281", "wer": "18.695", "wps": "68.6", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "88800", "lr": "0.00044456", "gnorm": "3.619", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "249543"} [2024-07-08 22:06:38,775][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-08 22:09:27,589][train_inner][INFO] - {"epoch": 1, "update": 0.59, "loss": "1.372", "ntokens": "128.01", "acc_total": "128.01", "n_correct": "103.17", "wer_total": "128.01", "n_error": "24.825", "ppl": "2.59", "accuracy": "80.595", "wer": "19.393", "wps": "69.4", "ups": "0.54", "wpb": "128", "bsz": "8", "num_updates": "89000", "lr": "0.00044555", "gnorm": "3.746", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "249912"} [2024-07-08 22:15:34,171][train_inner][INFO] - {"epoch": 1, "update": 0.592, "loss": "1.37", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "102.895", "wer_total": "127.35", "n_error": "24.445", "ppl": "2.59", "accuracy": "80.797", "wer": "19.195", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "89200", "lr": "0.00044654", "gnorm": "3.783", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "250278"} [2024-07-08 22:21:41,048][train_inner][INFO] - {"epoch": 1, "update": 0.593, "loss": "1.322", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "103.195", "wer_total": "127.145", "n_error": "23.94", "ppl": "2.5", "accuracy": "81.163", "wer": "18.829", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "89400", "lr": "0.00044753", "gnorm": "3.506", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "250645"} [2024-07-08 22:27:47,928][train_inner][INFO] - {"epoch": 1, "update": 0.594, "loss": "1.299", "ntokens": "127.16", "acc_total": "127.16", "n_correct": "103.7", "wer_total": "127.16", "n_error": "23.455", "ppl": "2.46", "accuracy": "81.551", "wer": "18.445", "wps": "69.3", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "89600", "lr": "0.00044852", "gnorm": "3.476", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "251012"} [2024-07-08 22:33:54,205][train_inner][INFO] - {"epoch": 1, "update": 0.596, "loss": "1.401", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "102.37", "wer_total": "127.03", "n_error": "24.64", "ppl": "2.64", "accuracy": "80.587", "wer": "19.397", "wps": "69.4", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "89800", "lr": "0.00044951", "gnorm": "3.737", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "251378"} [2024-07-08 22:40:01,018][train_inner][INFO] - {"epoch": 1, "update": 0.597, "loss": "1.337", "ntokens": "126.415", "acc_total": "126.415", "n_correct": "102.43", "wer_total": "126.415", "n_error": "23.98", "ppl": "2.53", "accuracy": "81.027", "wer": "18.969", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "90000", "lr": "0.0004505", "gnorm": "3.798", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "251745"} [2024-07-08 22:40:01,018][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-08 23:21:49,839][valid][INFO] - {"epoch": 1, "valid_loss": "1.13", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.1877", "valid_wer_total": "18.1585", "valid_n_error": "2.96907", "valid_ppl": "2.19", "valid_accuracy": "83.64", "valid_wer": "16.351", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "90000", "valid_best_accuracy": "84.959"} [2024-07-08 23:21:49,840][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 90000 updates [2024-07-08 23:21:49,840][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_90000.pt [2024-07-08 23:21:53,015][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_90000.pt [2024-07-08 23:21:55,138][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_90000.pt (epoch 1 @ 90000 updates, score 83.64) (writing took 5.298082134977449 seconds) [2024-07-08 23:28:01,466][train_inner][INFO] - {"epoch": 1, "update": 0.598, "loss": "1.278", "ntokens": "126.42", "acc_total": "126.42", "n_correct": "103.25", "wer_total": "126.42", "n_error": "23.16", "ppl": "2.42", "accuracy": "81.672", "wer": "18.32", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "90200", "lr": "0.00045149", "gnorm": "3.639", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "254626"} [2024-07-08 23:34:08,263][train_inner][INFO] - {"epoch": 1, "update": 0.599, "loss": "1.283", "ntokens": "126.37", "acc_total": "126.37", "n_correct": "103.44", "wer_total": "126.37", "n_error": "22.915", "ppl": "2.43", "accuracy": "81.855", "wer": "18.133", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "90400", "lr": "0.00045248", "gnorm": "3.606", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "254992"} [2024-07-08 23:40:14,846][train_inner][INFO] - {"epoch": 1, "update": 0.601, "loss": "1.307", "ntokens": "127.37", "acc_total": "127.37", "n_correct": "104.155", "wer_total": "127.37", "n_error": "23.2", "ppl": "2.47", "accuracy": "81.774", "wer": "18.215", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "90600", "lr": "0.00045347", "gnorm": "3.72", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "255359"} [2024-07-08 23:46:21,447][train_inner][INFO] - {"epoch": 1, "update": 0.602, "loss": "1.381", "ntokens": "127.575", "acc_total": "127.575", "n_correct": "102.92", "wer_total": "127.575", "n_error": "24.635", "ppl": "2.6", "accuracy": "80.674", "wer": "19.31", "wps": "69.6", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "90800", "lr": "0.00045446", "gnorm": "3.686", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "255726"} [2024-07-08 23:46:41,524][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-08 23:52:29,739][train_inner][INFO] - {"epoch": 1, "update": 0.603, "loss": "1.403", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "101.415", "wer_total": "126.21", "n_error": "24.775", "ppl": "2.65", "accuracy": "80.354", "wer": "19.63", "wps": "68.5", "ups": "0.54", "wpb": "126.2", "bsz": "8", "num_updates": "91000", "lr": "0.00045545", "gnorm": "3.836", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "256094"} [2024-07-08 23:58:37,700][train_inner][INFO] - {"epoch": 1, "update": 0.605, "loss": "1.322", "ntokens": "126.37", "acc_total": "126.37", "n_correct": "102.295", "wer_total": "126.37", "n_error": "24.065", "ppl": "2.5", "accuracy": "80.949", "wer": "19.043", "wps": "68.7", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "91200", "lr": "0.00045644", "gnorm": "3.58", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "256462"} [2024-07-09 00:04:45,052][train_inner][INFO] - {"epoch": 1, "update": 0.606, "loss": "1.347", "ntokens": "127.215", "acc_total": "127.215", "n_correct": "102.72", "wer_total": "127.215", "n_error": "24.49", "ppl": "2.54", "accuracy": "80.745", "wer": "19.251", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "91400", "lr": "0.00045743", "gnorm": "3.586", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "256829"} [2024-07-09 00:10:52,109][train_inner][INFO] - {"epoch": 1, "update": 0.607, "loss": "1.424", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "101.7", "wer_total": "127.03", "n_error": "25.325", "ppl": "2.68", "accuracy": "80.06", "wer": "19.936", "wps": "69.2", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "91600", "lr": "0.00045842", "gnorm": "3.741", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "257196"} [2024-07-09 00:15:18,084][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-09 00:17:00,564][train_inner][INFO] - {"epoch": 1, "update": 0.609, "loss": "1.369", "ntokens": "126.315", "acc_total": "126.315", "n_correct": "102.2", "wer_total": "126.315", "n_error": "24.085", "ppl": "2.58", "accuracy": "80.909", "wer": "19.067", "wps": "68.6", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "91800", "lr": "0.00045941", "gnorm": "3.701", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "257565"} [2024-07-09 00:23:07,238][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "1.346", "ntokens": "127.205", "acc_total": "127.205", "n_correct": "103.18", "wer_total": "127.205", "n_error": "24.02", "ppl": "2.54", "accuracy": "81.113", "wer": "18.883", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "92000", "lr": "0.0004604", "gnorm": "3.727", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "257931"} [2024-07-09 00:29:13,568][train_inner][INFO] - {"epoch": 1, "update": 0.611, "loss": "1.325", "ntokens": "127.255", "acc_total": "127.255", "n_correct": "103.68", "wer_total": "127.255", "n_error": "23.575", "ppl": "2.51", "accuracy": "81.474", "wer": "18.526", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "92200", "lr": "0.00046139", "gnorm": "3.623", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "258298"} [2024-07-09 00:35:20,177][train_inner][INFO] - {"epoch": 1, "update": 0.613, "loss": "1.385", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "101.76", "wer_total": "126.905", "n_error": "25.11", "ppl": "2.61", "accuracy": "80.186", "wer": "19.786", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "92400", "lr": "0.00046238", "gnorm": "3.716", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "258664"} [2024-07-09 00:38:23,477][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 01:20:13,900][valid][INFO] - {"epoch": 1, "valid_loss": "1.123", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.1791", "valid_wer_total": "18.1585", "valid_n_error": "2.97841", "valid_ppl": "2.18", "valid_accuracy": "83.592", "valid_wer": "16.402", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "92500", "valid_best_accuracy": "84.959"} [2024-07-09 01:20:13,900][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 92500 updates [2024-07-09 01:20:13,900][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_92500.pt [2024-07-09 01:20:17,095][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_92500.pt [2024-07-09 01:20:19,190][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_92500.pt (epoch 1 @ 92500 updates, score 83.592) (writing took 5.290069808019325 seconds) [2024-07-09 01:23:22,464][train_inner][INFO] - {"epoch": 1, "update": 0.614, "loss": "1.317", "ntokens": "127.13", "acc_total": "127.13", "n_correct": "103.655", "wer_total": "127.13", "n_error": "23.46", "ppl": "2.49", "accuracy": "81.535", "wer": "18.454", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "92600", "lr": "0.00046337", "gnorm": "3.584", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "261547"} [2024-07-09 01:29:29,126][train_inner][INFO] - {"epoch": 1, "update": 0.615, "loss": "1.384", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "101.81", "wer_total": "126.405", "n_error": "24.585", "ppl": "2.61", "accuracy": "80.543", "wer": "19.449", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "92800", "lr": "0.00046436", "gnorm": "4.077", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "261913"} [2024-07-09 01:35:35,689][train_inner][INFO] - {"epoch": 1, "update": 0.617, "loss": "1.36", "ntokens": "126.55", "acc_total": "126.55", "n_correct": "102.22", "wer_total": "126.55", "n_error": "24.315", "ppl": "2.57", "accuracy": "80.774", "wer": "19.214", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "93000", "lr": "0.00046535", "gnorm": "3.582", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "262280"} [2024-07-09 01:41:42,175][train_inner][INFO] - {"epoch": 1, "update": 0.618, "loss": "1.369", "ntokens": "126.85", "acc_total": "126.85", "n_correct": "102.265", "wer_total": "126.85", "n_error": "24.575", "ppl": "2.58", "accuracy": "80.619", "wer": "19.373", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "93200", "lr": "0.00046634", "gnorm": "3.753", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "262646"} [2024-07-09 01:47:48,763][train_inner][INFO] - {"epoch": 1, "update": 0.619, "loss": "1.399", "ntokens": "127.04", "acc_total": "127.04", "n_correct": "102.32", "wer_total": "127.04", "n_error": "24.71", "ppl": "2.64", "accuracy": "80.542", "wer": "19.451", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "93400", "lr": "0.00046733", "gnorm": "3.721", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "263013"} [2024-07-09 01:53:55,322][train_inner][INFO] - {"epoch": 1, "update": 0.621, "loss": "1.375", "ntokens": "126.03", "acc_total": "126.03", "n_correct": "102.21", "wer_total": "126.03", "n_error": "23.8", "ppl": "2.59", "accuracy": "81.1", "wer": "18.884", "wps": "68.8", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "93600", "lr": "0.00046832", "gnorm": "3.829", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "263379"} [2024-07-09 02:00:02,245][train_inner][INFO] - {"epoch": 1, "update": 0.622, "loss": "1.362", "ntokens": "126.73", "acc_total": "126.73", "n_correct": "102.45", "wer_total": "126.73", "n_error": "24.275", "ppl": "2.57", "accuracy": "80.841", "wer": "19.155", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "93800", "lr": "0.00046931", "gnorm": "3.654", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "263746"} [2024-07-09 02:06:09,109][train_inner][INFO] - {"epoch": 1, "update": 0.623, "loss": "1.369", "ntokens": "126.545", "acc_total": "126.545", "n_correct": "101.99", "wer_total": "126.545", "n_error": "24.535", "ppl": "2.58", "accuracy": "80.596", "wer": "19.388", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "94000", "lr": "0.0004703", "gnorm": "3.972", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "264113"} [2024-07-09 02:12:15,641][train_inner][INFO] - {"epoch": 1, "update": 0.625, "loss": "1.419", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "101.51", "wer_total": "126.625", "n_error": "25.09", "ppl": "2.67", "accuracy": "80.166", "wer": "19.814", "wps": "69.1", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "94200", "lr": "0.00047129", "gnorm": "3.902", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "264480"} [2024-07-09 02:18:22,147][train_inner][INFO] - {"epoch": 1, "update": 0.626, "loss": "1.385", "ntokens": "127.165", "acc_total": "127.165", "n_correct": "102.345", "wer_total": "127.165", "n_error": "24.795", "ppl": "2.61", "accuracy": "80.482", "wer": "19.498", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "94400", "lr": "0.00047228", "gnorm": "3.759", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "264846"} [2024-07-09 02:24:28,998][train_inner][INFO] - {"epoch": 1, "update": 0.627, "loss": "1.324", "ntokens": "126.25", "acc_total": "126.25", "n_correct": "102.49", "wer_total": "126.25", "n_error": "23.745", "ppl": "2.5", "accuracy": "81.18", "wer": "18.808", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "94600", "lr": "0.00047327", "gnorm": "3.633", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "265213"} [2024-07-09 02:30:35,539][train_inner][INFO] - {"epoch": 1, "update": 0.629, "loss": "1.37", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "102.56", "wer_total": "127.09", "n_error": "24.51", "ppl": "2.58", "accuracy": "80.699", "wer": "19.286", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "94800", "lr": "0.00047426", "gnorm": "3.7", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "265580"} [2024-07-09 02:36:41,840][train_inner][INFO] - {"epoch": 1, "update": 0.63, "loss": "1.313", "ntokens": "127.12", "acc_total": "127.12", "n_correct": "102.79", "wer_total": "127.12", "n_error": "24.32", "ppl": "2.49", "accuracy": "80.861", "wer": "19.132", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "95000", "lr": "0.00047525", "gnorm": "3.605", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "265946"} [2024-07-09 02:36:41,842][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 03:18:31,838][valid][INFO] - {"epoch": 1, "valid_loss": "1.136", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.1561", "valid_wer_total": "18.1585", "valid_n_error": "3.00004", "valid_ppl": "2.2", "valid_accuracy": "83.466", "valid_wer": "16.521", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "95000", "valid_best_accuracy": "84.959"} [2024-07-09 03:18:31,839][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 95000 updates [2024-07-09 03:18:31,839][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_95000.pt [2024-07-09 03:18:35,006][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_95000.pt [2024-07-09 03:18:37,028][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_95000.pt (epoch 1 @ 95000 updates, score 83.466) (writing took 5.189465680974536 seconds) [2024-07-09 03:24:43,671][train_inner][INFO] - {"epoch": 1, "update": 0.631, "loss": "1.387", "ntokens": "128.385", "acc_total": "128.385", "n_correct": "103.405", "wer_total": "128.385", "n_error": "24.96", "ppl": "2.62", "accuracy": "80.543", "wer": "19.442", "wps": "8.9", "ups": "0.07", "wpb": "128.4", "bsz": "8", "num_updates": "95200", "lr": "0.00047624", "gnorm": "3.831", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "268828"} [2024-07-09 03:30:50,477][train_inner][INFO] - {"epoch": 1, "update": 0.633, "loss": "1.395", "ntokens": "126.825", "acc_total": "126.825", "n_correct": "102.07", "wer_total": "126.825", "n_error": "24.74", "ppl": "2.63", "accuracy": "80.481", "wer": "19.507", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "95400", "lr": "0.00047723", "gnorm": "3.582", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "269195"} [2024-07-09 03:36:57,062][train_inner][INFO] - {"epoch": 1, "update": 0.634, "loss": "1.428", "ntokens": "126.47", "acc_total": "126.47", "n_correct": "101.06", "wer_total": "126.47", "n_error": "25.39", "ppl": "2.69", "accuracy": "79.908", "wer": "20.076", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "95600", "lr": "0.00047822", "gnorm": "3.798", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "269561"} [2024-07-09 03:43:03,518][train_inner][INFO] - {"epoch": 1, "update": 0.635, "loss": "1.406", "ntokens": "127.4", "acc_total": "127.4", "n_correct": "101.88", "wer_total": "127.4", "n_error": "25.49", "ppl": "2.65", "accuracy": "79.969", "wer": "20.008", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "95800", "lr": "0.00047921", "gnorm": "3.82", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "269928"} [2024-07-09 03:49:10,178][train_inner][INFO] - {"epoch": 1, "update": 0.637, "loss": "1.364", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "102.685", "wer_total": "127.41", "n_error": "24.725", "ppl": "2.57", "accuracy": "80.594", "wer": "19.406", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "96000", "lr": "0.0004802", "gnorm": "3.679", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "270294"} [2024-07-09 03:55:18,075][train_inner][INFO] - {"epoch": 1, "update": 0.638, "loss": "1.363", "ntokens": "126.8", "acc_total": "126.8", "n_correct": "102.685", "wer_total": "126.8", "n_error": "24.095", "ppl": "2.57", "accuracy": "80.982", "wer": "19.002", "wps": "68.9", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "96200", "lr": "0.00048119", "gnorm": "3.659", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "270662"} [2024-07-09 04:01:24,836][train_inner][INFO] - {"epoch": 1, "update": 0.639, "loss": "1.352", "ntokens": "126.235", "acc_total": "126.235", "n_correct": "101.925", "wer_total": "126.235", "n_error": "24.295", "ppl": "2.55", "accuracy": "80.742", "wer": "19.246", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "96400", "lr": "0.00048218", "gnorm": "3.761", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "271029"} [2024-07-09 04:07:31,361][train_inner][INFO] - {"epoch": 1, "update": 0.641, "loss": "1.375", "ntokens": "127.585", "acc_total": "127.585", "n_correct": "103.05", "wer_total": "127.585", "n_error": "24.53", "ppl": "2.59", "accuracy": "80.77", "wer": "19.226", "wps": "69.6", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "96600", "lr": "0.00048317", "gnorm": "3.69", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "271395"} [2024-07-09 04:13:38,195][train_inner][INFO] - {"epoch": 1, "update": 0.642, "loss": "1.372", "ntokens": "126.75", "acc_total": "126.75", "n_correct": "102.09", "wer_total": "126.75", "n_error": "24.635", "ppl": "2.59", "accuracy": "80.544", "wer": "19.436", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "96800", "lr": "0.00048416", "gnorm": "3.573", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "271762"} [2024-07-09 04:19:44,684][train_inner][INFO] - {"epoch": 1, "update": 0.643, "loss": "1.417", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "102.025", "wer_total": "127.335", "n_error": "25.285", "ppl": "2.67", "accuracy": "80.123", "wer": "19.857", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "97000", "lr": "0.00048515", "gnorm": "3.833", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "272129"} [2024-07-09 04:25:51,510][train_inner][INFO] - {"epoch": 1, "update": 0.645, "loss": "1.376", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "103.045", "wer_total": "127.885", "n_error": "24.83", "ppl": "2.6", "accuracy": "80.576", "wer": "19.416", "wps": "69.7", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "97200", "lr": "0.00048614", "gnorm": "3.746", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "272496"} [2024-07-09 04:31:58,235][train_inner][INFO] - {"epoch": 1, "update": 0.646, "loss": "1.384", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "102.075", "wer_total": "127.075", "n_error": "24.985", "ppl": "2.61", "accuracy": "80.327", "wer": "19.662", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "97400", "lr": "0.00048713", "gnorm": "3.97", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "272862"} [2024-07-09 04:35:01,757][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 05:16:51,690][valid][INFO] - {"epoch": 1, "valid_loss": "1.136", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.091", "valid_wer_total": "18.1585", "valid_n_error": "3.0659", "valid_ppl": "2.2", "valid_accuracy": "83.107", "valid_wer": "16.884", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "97500", "valid_best_accuracy": "84.959"} [2024-07-09 05:16:51,691][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 97500 updates [2024-07-09 05:16:51,691][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_97500.pt [2024-07-09 05:16:54,947][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_97500.pt [2024-07-09 05:16:57,062][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_97500.pt (epoch 1 @ 97500 updates, score 83.107) (writing took 5.371392950997688 seconds) [2024-07-09 05:20:00,065][train_inner][INFO] - {"epoch": 1, "update": 0.647, "loss": "1.363", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "102.49", "wer_total": "127.35", "n_error": "24.845", "ppl": "2.57", "accuracy": "80.479", "wer": "19.509", "wps": "8.8", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "97600", "lr": "0.00048812", "gnorm": "3.889", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "275744"} [2024-07-09 05:25:51,854][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-09 05:26:08,362][train_inner][INFO] - {"epoch": 1, "update": 0.649, "loss": "1.363", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "102.145", "wer_total": "126.665", "n_error": "24.5", "ppl": "2.57", "accuracy": "80.642", "wer": "19.342", "wps": "68.8", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "97800", "lr": "0.00048911", "gnorm": "3.611", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "276112"} [2024-07-09 05:32:14,889][train_inner][INFO] - {"epoch": 1, "update": 0.65, "loss": "1.413", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "101.995", "wer_total": "126.92", "n_error": "24.91", "ppl": "2.66", "accuracy": "80.362", "wer": "19.627", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "98000", "lr": "0.0004901", "gnorm": "3.752", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "276479"} [2024-07-09 05:38:24,074][train_inner][INFO] - {"epoch": 1, "update": 0.651, "loss": "1.378", "ntokens": "127.275", "acc_total": "127.275", "n_correct": "102.115", "wer_total": "127.275", "n_error": "25.155", "ppl": "2.6", "accuracy": "80.232", "wer": "19.764", "wps": "68.9", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "98200", "lr": "0.00049109", "gnorm": "3.875", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "276848"} [2024-07-09 05:44:31,205][train_inner][INFO] - {"epoch": 1, "update": 0.653, "loss": "1.384", "ntokens": "127.3", "acc_total": "127.3", "n_correct": "102.565", "wer_total": "127.3", "n_error": "24.73", "ppl": "2.61", "accuracy": "80.57", "wer": "19.427", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "98400", "lr": "0.00049208", "gnorm": "3.989", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "277215"} [2024-07-09 05:50:38,026][train_inner][INFO] - {"epoch": 1, "update": 0.654, "loss": "1.38", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "102.645", "wer_total": "127.155", "n_error": "24.5", "ppl": "2.6", "accuracy": "80.724", "wer": "19.268", "wps": "69.3", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "98600", "lr": "0.00049307", "gnorm": "3.862", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "277582"} [2024-07-09 05:56:44,780][train_inner][INFO] - {"epoch": 1, "update": 0.655, "loss": "1.366", "ntokens": "125.98", "acc_total": "125.98", "n_correct": "101.74", "wer_total": "125.98", "n_error": "24.23", "ppl": "2.58", "accuracy": "80.759", "wer": "19.233", "wps": "68.7", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "98800", "lr": "0.00049406", "gnorm": "3.819", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "277949"} [2024-07-09 06:02:51,276][train_inner][INFO] - {"epoch": 1, "update": 0.657, "loss": "1.387", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "102.265", "wer_total": "127.19", "n_error": "24.905", "ppl": "2.62", "accuracy": "80.403", "wer": "19.581", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "99000", "lr": "0.00049505", "gnorm": "3.681", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "278315"} [2024-07-09 06:08:57,791][train_inner][INFO] - {"epoch": 1, "update": 0.658, "loss": "1.412", "ntokens": "127.955", "acc_total": "127.955", "n_correct": "102.515", "wer_total": "127.955", "n_error": "25.43", "ppl": "2.66", "accuracy": "80.118", "wer": "19.874", "wps": "69.8", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "99200", "lr": "0.00049604", "gnorm": "3.903", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "278682"} [2024-07-09 06:15:04,411][train_inner][INFO] - {"epoch": 1, "update": 0.659, "loss": "1.404", "ntokens": "126.925", "acc_total": "126.925", "n_correct": "101.745", "wer_total": "126.925", "n_error": "25.165", "ppl": "2.65", "accuracy": "80.162", "wer": "19.827", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "99400", "lr": "0.00049703", "gnorm": "3.89", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "279048"} [2024-07-09 06:21:11,419][train_inner][INFO] - {"epoch": 1, "update": 0.66, "loss": "1.337", "ntokens": "127.29", "acc_total": "127.29", "n_correct": "102.825", "wer_total": "127.29", "n_error": "24.435", "ppl": "2.53", "accuracy": "80.78", "wer": "19.196", "wps": "69.4", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "99600", "lr": "0.00049802", "gnorm": "3.783", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "279415"} [2024-07-09 06:27:18,161][train_inner][INFO] - {"epoch": 1, "update": 0.662, "loss": "1.464", "ntokens": "126.365", "acc_total": "126.365", "n_correct": "100.215", "wer_total": "126.365", "n_error": "26.12", "ppl": "2.76", "accuracy": "79.306", "wer": "20.67", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "99800", "lr": "0.00049901", "gnorm": "4.027", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "279782"} [2024-07-09 06:33:24,737][train_inner][INFO] - {"epoch": 1, "update": 0.663, "loss": "1.433", "ntokens": "126.095", "acc_total": "126.095", "n_correct": "100.975", "wer_total": "126.095", "n_error": "25.09", "ppl": "2.7", "accuracy": "80.079", "wer": "19.898", "wps": "68.8", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "100000", "lr": "0.0005", "gnorm": "3.983", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "280149"} [2024-07-09 06:33:24,737][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 07:15:12,811][valid][INFO] - {"epoch": 1, "valid_loss": "1.159", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.1094", "valid_wer_total": "18.1585", "valid_n_error": "3.04769", "valid_ppl": "2.23", "valid_accuracy": "83.208", "valid_wer": "16.784", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "100000", "valid_best_accuracy": "84.959"} [2024-07-09 07:15:12,812][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 100000 updates [2024-07-09 07:15:12,812][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_100000.pt [2024-07-09 07:15:15,972][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_100000.pt [2024-07-09 07:15:18,111][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_100000.pt (epoch 1 @ 100000 updates, score 83.208) (writing took 5.299130220897496 seconds) [2024-07-09 07:21:24,775][train_inner][INFO] - {"epoch": 1, "update": 0.664, "loss": "1.432", "ntokens": "128.64", "acc_total": "128.64", "n_correct": "102.21", "wer_total": "128.64", "n_error": "26.41", "ppl": "2.7", "accuracy": "79.454", "wer": "20.53", "wps": "8.9", "ups": "0.07", "wpb": "128.6", "bsz": "8", "num_updates": "100200", "lr": "0.000498504", "gnorm": "4.029", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "283029"} [2024-07-09 07:27:32,099][train_inner][INFO] - {"epoch": 1, "update": 0.666, "loss": "1.391", "ntokens": "126.69", "acc_total": "126.69", "n_correct": "101.73", "wer_total": "126.69", "n_error": "24.925", "ppl": "2.62", "accuracy": "80.298", "wer": "19.674", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "100400", "lr": "0.000497013", "gnorm": "3.837", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "283396"} [2024-07-09 07:33:40,062][train_inner][INFO] - {"epoch": 1, "update": 0.667, "loss": "1.359", "ntokens": "126.83", "acc_total": "126.83", "n_correct": "102.135", "wer_total": "126.83", "n_error": "24.685", "ppl": "2.56", "accuracy": "80.529", "wer": "19.463", "wps": "68.9", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "100600", "lr": "0.000495527", "gnorm": "3.875", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "283764"} [2024-07-09 07:39:46,699][train_inner][INFO] - {"epoch": 1, "update": 0.668, "loss": "1.384", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "102.075", "wer_total": "126.835", "n_error": "24.73", "ppl": "2.61", "accuracy": "80.479", "wer": "19.498", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "100800", "lr": "0.000494044", "gnorm": "3.878", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "284131"} [2024-07-09 07:45:53,102][train_inner][INFO] - {"epoch": 1, "update": 0.67, "loss": "1.421", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "101.375", "wer_total": "126.945", "n_error": "25.555", "ppl": "2.68", "accuracy": "79.857", "wer": "20.131", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "101000", "lr": "0.000492566", "gnorm": "4.08", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "284497"} [2024-07-09 07:47:50,389][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-09 07:52:01,508][train_inner][INFO] - {"epoch": 1, "update": 0.671, "loss": "1.459", "ntokens": "125.87", "acc_total": "125.87", "n_correct": "100.67", "wer_total": "125.87", "n_error": "25.19", "ppl": "2.75", "accuracy": "79.979", "wer": "20.013", "wps": "68.3", "ups": "0.54", "wpb": "125.9", "bsz": "8", "num_updates": "101200", "lr": "0.000491093", "gnorm": "3.841", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "284866"} [2024-07-09 07:58:08,228][train_inner][INFO] - {"epoch": 1, "update": 0.672, "loss": "1.413", "ntokens": "127.13", "acc_total": "127.13", "n_correct": "102.255", "wer_total": "127.13", "n_error": "24.845", "ppl": "2.66", "accuracy": "80.433", "wer": "19.543", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "101400", "lr": "0.000489624", "gnorm": "3.965", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "285232"} [2024-07-09 08:04:15,157][train_inner][INFO] - {"epoch": 1, "update": 0.674, "loss": "1.385", "ntokens": "126.175", "acc_total": "126.175", "n_correct": "101.155", "wer_total": "126.175", "n_error": "25.02", "ppl": "2.61", "accuracy": "80.17", "wer": "19.83", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "101600", "lr": "0.00048816", "gnorm": "3.754", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "285599"} [2024-07-09 08:05:46,744][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-09 08:10:24,188][train_inner][INFO] - {"epoch": 1, "update": 0.675, "loss": "1.425", "ntokens": "128.2", "acc_total": "128.2", "n_correct": "102.03", "wer_total": "128.2", "n_error": "26.155", "ppl": "2.68", "accuracy": "79.587", "wer": "20.402", "wps": "69.5", "ups": "0.54", "wpb": "128.2", "bsz": "8", "num_updates": "101800", "lr": "0.000486699", "gnorm": "4.124", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "285968"} [2024-07-09 08:16:30,944][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "1.425", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "100.895", "wer_total": "126.18", "n_error": "25.255", "ppl": "2.68", "accuracy": "79.961", "wer": "20.015", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "102000", "lr": "0.000485243", "gnorm": "3.802", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "286335"} [2024-07-09 08:22:37,590][train_inner][INFO] - {"epoch": 1, "update": 0.678, "loss": "1.403", "ntokens": "125.94", "acc_total": "125.94", "n_correct": "101.05", "wer_total": "125.94", "n_error": "24.88", "ppl": "2.64", "accuracy": "80.237", "wer": "19.755", "wps": "68.7", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "102200", "lr": "0.000483792", "gnorm": "4.154", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "286702"} [2024-07-09 08:28:43,879][train_inner][INFO] - {"epoch": 1, "update": 0.679, "loss": "1.362", "ntokens": "127.08", "acc_total": "127.08", "n_correct": "102.675", "wer_total": "127.08", "n_error": "24.375", "ppl": "2.57", "accuracy": "80.796", "wer": "19.181", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "102400", "lr": "0.000482345", "gnorm": "4.025", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "287068"} [2024-07-09 08:31:46,951][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 09:13:33,887][valid][INFO] - {"epoch": 1, "valid_loss": "1.148", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.0855", "valid_wer_total": "18.1585", "valid_n_error": "3.0704", "valid_ppl": "2.22", "valid_accuracy": "83.077", "valid_wer": "16.909", "valid_wps": "173.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "102500", "valid_best_accuracy": "84.959"} [2024-07-09 09:13:33,888][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 102500 updates [2024-07-09 09:13:33,888][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_102500.pt [2024-07-09 09:13:37,195][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_102500.pt [2024-07-09 09:13:39,213][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_102500.pt (epoch 1 @ 102500 updates, score 83.077) (writing took 5.324733322951943 seconds) [2024-07-09 09:16:42,305][train_inner][INFO] - {"epoch": 1, "update": 0.68, "loss": "1.41", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "101.67", "wer_total": "126.82", "n_error": "25.135", "ppl": "2.66", "accuracy": "80.169", "wer": "19.819", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "102600", "lr": "0.000480902", "gnorm": "3.749", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "289946"} [2024-07-09 09:22:48,982][train_inner][INFO] - {"epoch": 1, "update": 0.682, "loss": "1.39", "ntokens": "127.22", "acc_total": "127.22", "n_correct": "101.915", "wer_total": "127.22", "n_error": "25.29", "ppl": "2.62", "accuracy": "80.109", "wer": "19.879", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "102800", "lr": "0.000479464", "gnorm": "4.009", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "290313"} [2024-07-09 09:29:00,751][train_inner][INFO] - {"epoch": 1, "update": 0.683, "loss": "1.398", "ntokens": "126.8", "acc_total": "126.8", "n_correct": "101.235", "wer_total": "126.8", "n_error": "25.55", "ppl": "2.64", "accuracy": "79.838", "wer": "20.15", "wps": "68.2", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "103000", "lr": "0.000478029", "gnorm": "4.119", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "290685"} [2024-07-09 09:35:07,999][train_inner][INFO] - {"epoch": 1, "update": 0.684, "loss": "1.366", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "102.315", "wer_total": "126.805", "n_error": "24.475", "ppl": "2.58", "accuracy": "80.687", "wer": "19.301", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "103200", "lr": "0.000476599", "gnorm": "3.735", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "291052"} [2024-07-09 09:41:14,670][train_inner][INFO] - {"epoch": 1, "update": 0.686, "loss": "1.385", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "102.39", "wer_total": "127.375", "n_error": "24.98", "ppl": "2.61", "accuracy": "80.385", "wer": "19.611", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "103400", "lr": "0.000475174", "gnorm": "3.944", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "291419"} [2024-07-09 09:47:21,633][train_inner][INFO] - {"epoch": 1, "update": 0.687, "loss": "1.414", "ntokens": "127.76", "acc_total": "127.76", "n_correct": "102.245", "wer_total": "127.76", "n_error": "25.505", "ppl": "2.66", "accuracy": "80.029", "wer": "19.963", "wps": "69.6", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "103600", "lr": "0.000473752", "gnorm": "4.067", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "291786"} [2024-07-09 09:53:28,296][train_inner][INFO] - {"epoch": 1, "update": 0.688, "loss": "1.334", "ntokens": "126.83", "acc_total": "126.83", "n_correct": "102.655", "wer_total": "126.83", "n_error": "24.145", "ppl": "2.52", "accuracy": "80.939", "wer": "19.037", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "103800", "lr": "0.000472335", "gnorm": "3.63", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "292152"} [2024-07-09 09:59:35,129][train_inner][INFO] - {"epoch": 1, "update": 0.69, "loss": "1.391", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "101.67", "wer_total": "126.705", "n_error": "25.01", "ppl": "2.62", "accuracy": "80.242", "wer": "19.739", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "104000", "lr": "0.000470922", "gnorm": "3.748", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "292519"} [2024-07-09 10:05:41,818][train_inner][INFO] - {"epoch": 1, "update": 0.691, "loss": "1.376", "ntokens": "127.36", "acc_total": "127.36", "n_correct": "102.61", "wer_total": "127.36", "n_error": "24.75", "ppl": "2.59", "accuracy": "80.567", "wer": "19.433", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "104200", "lr": "0.000469514", "gnorm": "4.131", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "292886"} [2024-07-09 10:11:48,866][train_inner][INFO] - {"epoch": 1, "update": 0.692, "loss": "1.404", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "102.23", "wer_total": "127.515", "n_error": "25.26", "ppl": "2.65", "accuracy": "80.171", "wer": "19.809", "wps": "69.5", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "104400", "lr": "0.000468109", "gnorm": "3.958", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "293253"} [2024-07-09 10:17:55,908][train_inner][INFO] - {"epoch": 1, "update": 0.694, "loss": "1.384", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "101.715", "wer_total": "126.53", "n_error": "24.795", "ppl": "2.61", "accuracy": "80.388", "wer": "19.596", "wps": "68.9", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "104600", "lr": "0.000466709", "gnorm": "3.993", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "293620"} [2024-07-09 10:24:03,016][train_inner][INFO] - {"epoch": 1, "update": 0.695, "loss": "1.422", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "102.195", "wer_total": "126.77", "n_error": "24.57", "ppl": "2.68", "accuracy": "80.614", "wer": "19.382", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "104800", "lr": "0.000465313", "gnorm": "3.947", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "293987"} [2024-07-09 10:30:10,214][train_inner][INFO] - {"epoch": 1, "update": 0.696, "loss": "1.337", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "102.505", "wer_total": "126.625", "n_error": "24.115", "ppl": "2.53", "accuracy": "80.952", "wer": "19.044", "wps": "69", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "105000", "lr": "0.000463921", "gnorm": "3.94", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "294354"} [2024-07-09 10:30:10,214][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 11:12:02,134][valid][INFO] - {"epoch": 1, "valid_loss": "1.108", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2058", "valid_wer_total": "18.1585", "valid_n_error": "2.95069", "valid_ppl": "2.16", "valid_accuracy": "83.739", "valid_wer": "16.25", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "105000", "valid_best_accuracy": "84.959"} [2024-07-09 11:12:02,134][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 105000 updates [2024-07-09 11:12:02,134][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_105000.pt [2024-07-09 11:12:05,363][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_105000.pt [2024-07-09 11:12:07,487][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_105000.pt (epoch 1 @ 105000 updates, score 83.739) (writing took 5.352789517026395 seconds) [2024-07-09 11:18:14,602][train_inner][INFO] - {"epoch": 1, "update": 0.698, "loss": "1.358", "ntokens": "127.085", "acc_total": "127.085", "n_correct": "102.345", "wer_total": "127.085", "n_error": "24.725", "ppl": "2.56", "accuracy": "80.533", "wer": "19.455", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "105200", "lr": "0.000462534", "gnorm": "3.879", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "297239"} [2024-07-09 11:24:22,002][train_inner][INFO] - {"epoch": 1, "update": 0.699, "loss": "1.359", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "102.51", "wer_total": "127.33", "n_error": "24.81", "ppl": "2.57", "accuracy": "80.507", "wer": "19.485", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "105400", "lr": "0.00046115", "gnorm": "3.659", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "297606"} [2024-07-09 11:30:28,909][train_inner][INFO] - {"epoch": 1, "update": 0.7, "loss": "1.381", "ntokens": "125.805", "acc_total": "125.805", "n_correct": "101.865", "wer_total": "125.805", "n_error": "23.93", "ppl": "2.6", "accuracy": "80.971", "wer": "19.022", "wps": "68.6", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "105600", "lr": "0.000459771", "gnorm": "3.914", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "297973"} [2024-07-09 11:36:36,405][train_inner][INFO] - {"epoch": 1, "update": 0.702, "loss": "1.401", "ntokens": "127.005", "acc_total": "127.005", "n_correct": "102.04", "wer_total": "127.005", "n_error": "24.955", "ppl": "2.64", "accuracy": "80.343", "wer": "19.649", "wps": "69.1", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "105800", "lr": "0.000458395", "gnorm": "4.197", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "298340"} [2024-07-09 11:40:33,742][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-09 11:42:45,992][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "1.38", "ntokens": "127.54", "acc_total": "127.54", "n_correct": "102.025", "wer_total": "127.54", "n_error": "25.5", "ppl": "2.6", "accuracy": "79.995", "wer": "19.994", "wps": "69", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "106000", "lr": "0.000457024", "gnorm": "3.793", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "298710"} [2024-07-09 11:48:53,489][train_inner][INFO] - {"epoch": 1, "update": 0.704, "loss": "1.369", "ntokens": "126.165", "acc_total": "126.165", "n_correct": "102.01", "wer_total": "126.165", "n_error": "24.14", "ppl": "2.58", "accuracy": "80.854", "wer": "19.134", "wps": "68.7", "ups": "0.54", "wpb": "126.2", "bsz": "8", "num_updates": "106200", "lr": "0.000455657", "gnorm": "3.711", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "299078"} [2024-07-09 11:55:01,000][train_inner][INFO] - {"epoch": 1, "update": 0.706, "loss": "1.318", "ntokens": "127.75", "acc_total": "127.75", "n_correct": "103.92", "wer_total": "127.75", "n_error": "23.81", "ppl": "2.49", "accuracy": "81.346", "wer": "18.638", "wps": "69.5", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "106400", "lr": "0.000454294", "gnorm": "3.698", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "299445"} [2024-07-09 12:01:08,815][train_inner][INFO] - {"epoch": 1, "update": 0.707, "loss": "1.382", "ntokens": "125.52", "acc_total": "125.52", "n_correct": "100.88", "wer_total": "125.52", "n_error": "24.62", "ppl": "2.61", "accuracy": "80.37", "wer": "19.614", "wps": "68.3", "ups": "0.54", "wpb": "125.5", "bsz": "8", "num_updates": "106600", "lr": "0.000452935", "gnorm": "3.997", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "299813"} [2024-07-09 12:07:16,477][train_inner][INFO] - {"epoch": 1, "update": 0.708, "loss": "1.333", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "103.265", "wer_total": "127.785", "n_error": "24.505", "ppl": "2.52", "accuracy": "80.812", "wer": "19.177", "wps": "69.5", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "106800", "lr": "0.00045158", "gnorm": "3.972", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "300181"} [2024-07-09 12:13:24,347][train_inner][INFO] - {"epoch": 1, "update": 0.71, "loss": "1.385", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "101.65", "wer_total": "126.46", "n_error": "24.79", "ppl": "2.61", "accuracy": "80.381", "wer": "19.603", "wps": "68.8", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "107000", "lr": "0.00045023", "gnorm": "3.866", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "300548"} [2024-07-09 12:19:31,902][train_inner][INFO] - {"epoch": 1, "update": 0.711, "loss": "1.296", "ntokens": "126.88", "acc_total": "126.88", "n_correct": "102.955", "wer_total": "126.88", "n_error": "23.895", "ppl": "2.46", "accuracy": "81.144", "wer": "18.833", "wps": "69", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "107200", "lr": "0.000448883", "gnorm": "3.825", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "300916"} [2024-07-09 12:25:39,949][train_inner][INFO] - {"epoch": 1, "update": 0.712, "loss": "1.323", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "103.345", "wer_total": "127.595", "n_error": "24.24", "ppl": "2.5", "accuracy": "80.995", "wer": "18.998", "wps": "69.3", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "107400", "lr": "0.00044754", "gnorm": "3.441", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "301284"} [2024-07-09 12:28:43,874][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 13:10:41,742][valid][INFO] - {"epoch": 1, "valid_loss": "1.086", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2024", "valid_wer_total": "18.1585", "valid_n_error": "2.95469", "valid_ppl": "2.12", "valid_accuracy": "83.721", "valid_wer": "16.272", "valid_wps": "173", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "107500", "valid_best_accuracy": "84.959"} [2024-07-09 13:10:41,742][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 107500 updates [2024-07-09 13:10:41,743][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_107500.pt [2024-07-09 13:10:44,944][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_107500.pt [2024-07-09 13:10:47,050][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_107500.pt (epoch 1 @ 107500 updates, score 83.721) (writing took 5.307070772047155 seconds) [2024-07-09 13:13:51,005][train_inner][INFO] - {"epoch": 1, "update": 0.714, "loss": "1.284", "ntokens": "127.545", "acc_total": "127.545", "n_correct": "104.28", "wer_total": "127.545", "n_error": "23.255", "ppl": "2.44", "accuracy": "81.759", "wer": "18.233", "wps": "8.8", "ups": "0.07", "wpb": "127.5", "bsz": "8", "num_updates": "107600", "lr": "0.000446201", "gnorm": "3.617", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "304175"} [2024-07-09 13:19:59,285][train_inner][INFO] - {"epoch": 1, "update": 0.715, "loss": "1.333", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "102.765", "wer_total": "127.135", "n_error": "24.35", "ppl": "2.52", "accuracy": "80.831", "wer": "19.153", "wps": "69", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "107800", "lr": "0.000444867", "gnorm": "3.732", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "304543"} [2024-07-09 13:26:07,324][train_inner][INFO] - {"epoch": 1, "update": 0.716, "loss": "1.32", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "103.245", "wer_total": "127.2", "n_error": "23.95", "ppl": "2.5", "accuracy": "81.167", "wer": "18.829", "wps": "69.1", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "108000", "lr": "0.000443536", "gnorm": "3.522", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "304911"} [2024-07-09 13:32:15,745][train_inner][INFO] - {"epoch": 1, "update": 0.718, "loss": "1.331", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "103.075", "wer_total": "126.875", "n_error": "23.77", "ppl": "2.52", "accuracy": "81.241", "wer": "18.735", "wps": "68.9", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "108200", "lr": "0.000442209", "gnorm": "3.581", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "305280"} [2024-07-09 13:38:24,348][train_inner][INFO] - {"epoch": 1, "update": 0.719, "loss": "1.352", "ntokens": "126.965", "acc_total": "126.965", "n_correct": "102.16", "wer_total": "126.965", "n_error": "24.795", "ppl": "2.55", "accuracy": "80.463", "wer": "19.529", "wps": "68.9", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "108400", "lr": "0.000440886", "gnorm": "3.649", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "305648"} [2024-07-09 13:44:32,954][train_inner][INFO] - {"epoch": 1, "update": 0.72, "loss": "1.379", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "102.655", "wer_total": "127.055", "n_error": "24.375", "ppl": "2.6", "accuracy": "80.796", "wer": "19.185", "wps": "68.9", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "108600", "lr": "0.000439568", "gnorm": "3.895", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "306017"} [2024-07-09 13:50:41,285][train_inner][INFO] - {"epoch": 1, "update": 0.722, "loss": "1.269", "ntokens": "127.305", "acc_total": "127.305", "n_correct": "103.98", "wer_total": "127.305", "n_error": "23.31", "ppl": "2.41", "accuracy": "81.678", "wer": "18.31", "wps": "69.1", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "108800", "lr": "0.000438253", "gnorm": "3.54", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "306385"} [2024-07-09 13:56:49,736][train_inner][INFO] - {"epoch": 1, "update": 0.723, "loss": "1.351", "ntokens": "127.005", "acc_total": "127.005", "n_correct": "102.765", "wer_total": "127.005", "n_error": "24.24", "ppl": "2.55", "accuracy": "80.914", "wer": "19.086", "wps": "68.9", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "109000", "lr": "0.000436942", "gnorm": "3.723", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "306754"} [2024-07-09 14:02:58,228][train_inner][INFO] - {"epoch": 1, "update": 0.724, "loss": "1.324", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "102.49", "wer_total": "126.915", "n_error": "24.41", "ppl": "2.5", "accuracy": "80.755", "wer": "19.233", "wps": "68.9", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "109200", "lr": "0.000435635", "gnorm": "3.589", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "307122"} [2024-07-09 14:09:07,068][train_inner][INFO] - {"epoch": 1, "update": 0.725, "loss": "1.353", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "102.295", "wer_total": "126.325", "n_error": "24.015", "ppl": "2.56", "accuracy": "80.978", "wer": "19.01", "wps": "68.5", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "109400", "lr": "0.000434332", "gnorm": "3.663", "loss_scale": "1024", "train_wall": "368", "gb_free": "6.5", "wall": "307491"} [2024-07-09 14:12:33,524][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-09 14:15:17,481][train_inner][INFO] - {"epoch": 1, "update": 0.727, "loss": "1.368", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "102.535", "wer_total": "126.93", "n_error": "24.39", "ppl": "2.58", "accuracy": "80.781", "wer": "19.215", "wps": "68.5", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "109600", "lr": "0.000433033", "gnorm": "3.795", "loss_scale": "512", "train_wall": "370", "gb_free": "6.5", "wall": "307862"} [2024-07-09 14:21:25,859][train_inner][INFO] - {"epoch": 1, "update": 0.728, "loss": "1.368", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "102.385", "wer_total": "126.515", "n_error": "24.125", "ppl": "2.58", "accuracy": "80.927", "wer": "19.069", "wps": "68.7", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "109800", "lr": "0.000431737", "gnorm": "3.792", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "308230"} [2024-07-09 14:27:34,554][train_inner][INFO] - {"epoch": 1, "update": 0.729, "loss": "1.267", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "103.835", "wer_total": "127.075", "n_error": "23.24", "ppl": "2.41", "accuracy": "81.712", "wer": "18.288", "wps": "68.9", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "110000", "lr": "0.000430446", "gnorm": "3.429", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "308599"} [2024-07-09 14:27:34,555][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 15:09:55,046][valid][INFO] - {"epoch": 1, "valid_loss": "1.071", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.221", "valid_wer_total": "18.1585", "valid_n_error": "2.93593", "valid_ppl": "2.1", "valid_accuracy": "83.823", "valid_wer": "16.168", "valid_wps": "171.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "110000", "valid_best_accuracy": "84.959"} [2024-07-09 15:09:55,046][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 110000 updates [2024-07-09 15:09:55,047][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_110000.pt [2024-07-09 15:09:58,245][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_110000.pt [2024-07-09 15:10:00,401][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_110000.pt (epoch 1 @ 110000 updates, score 83.823) (writing took 5.354602115927264 seconds) [2024-07-09 15:16:08,671][train_inner][INFO] - {"epoch": 1, "update": 0.731, "loss": "1.307", "ntokens": "126.38", "acc_total": "126.38", "n_correct": "102.74", "wer_total": "126.38", "n_error": "23.62", "ppl": "2.47", "accuracy": "81.295", "wer": "18.69", "wps": "8.7", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "110200", "lr": "0.000429158", "gnorm": "3.446", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "311513"} [2024-07-09 15:22:16,726][train_inner][INFO] - {"epoch": 1, "update": 0.732, "loss": "1.321", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "102.525", "wer_total": "126.31", "n_error": "23.76", "ppl": "2.5", "accuracy": "81.169", "wer": "18.811", "wps": "68.6", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "110400", "lr": "0.000427875", "gnorm": "3.598", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "311881"} [2024-07-09 15:28:24,855][train_inner][INFO] - {"epoch": 1, "update": 0.733, "loss": "1.291", "ntokens": "126.645", "acc_total": "126.645", "n_correct": "103.49", "wer_total": "126.645", "n_error": "23.13", "ppl": "2.45", "accuracy": "81.717", "wer": "18.264", "wps": "68.8", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "110600", "lr": "0.000426595", "gnorm": "3.481", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "312249"} [2024-07-09 15:34:32,577][train_inner][INFO] - {"epoch": 1, "update": 0.735, "loss": "1.303", "ntokens": "126.71", "acc_total": "126.71", "n_correct": "103.395", "wer_total": "126.71", "n_error": "23.295", "ppl": "2.47", "accuracy": "81.6", "wer": "18.385", "wps": "68.9", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "110800", "lr": "0.000425319", "gnorm": "3.323", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "312617"} [2024-07-09 15:40:40,801][train_inner][INFO] - {"epoch": 1, "update": 0.736, "loss": "1.363", "ntokens": "126.575", "acc_total": "126.575", "n_correct": "102.75", "wer_total": "126.575", "n_error": "23.815", "ppl": "2.57", "accuracy": "81.177", "wer": "18.815", "wps": "68.7", "ups": "0.54", "wpb": "126.6", "bsz": "8", "num_updates": "111000", "lr": "0.000424046", "gnorm": "3.655", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "312985"} [2024-07-09 15:46:48,792][train_inner][INFO] - {"epoch": 1, "update": 0.737, "loss": "1.288", "ntokens": "126.655", "acc_total": "126.655", "n_correct": "103.57", "wer_total": "126.655", "n_error": "23.085", "ppl": "2.44", "accuracy": "81.773", "wer": "18.227", "wps": "68.8", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "111200", "lr": "0.000422778", "gnorm": "3.482", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "313353"} [2024-07-09 15:52:56,493][train_inner][INFO] - {"epoch": 1, "update": 0.739, "loss": "1.296", "ntokens": "127.39", "acc_total": "127.39", "n_correct": "103.46", "wer_total": "127.39", "n_error": "23.9", "ppl": "2.46", "accuracy": "81.215", "wer": "18.761", "wps": "69.3", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "111400", "lr": "0.000421513", "gnorm": "3.53", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "313721"} [2024-07-09 15:59:04,433][train_inner][INFO] - {"epoch": 1, "update": 0.74, "loss": "1.289", "ntokens": "127.25", "acc_total": "127.25", "n_correct": "104.07", "wer_total": "127.25", "n_error": "23.165", "ppl": "2.44", "accuracy": "81.784", "wer": "18.204", "wps": "69.2", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "111600", "lr": "0.000420252", "gnorm": "3.583", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "314088"} [2024-07-09 16:05:12,108][train_inner][INFO] - {"epoch": 1, "update": 0.741, "loss": "1.256", "ntokens": "127.495", "acc_total": "127.495", "n_correct": "104.38", "wer_total": "127.495", "n_error": "23.105", "ppl": "2.39", "accuracy": "81.87", "wer": "18.122", "wps": "69.4", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "111800", "lr": "0.000418995", "gnorm": "3.827", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "314456"} [2024-07-09 16:11:19,737][train_inner][INFO] - {"epoch": 1, "update": 0.743, "loss": "1.32", "ntokens": "125.895", "acc_total": "125.895", "n_correct": "102.115", "wer_total": "125.895", "n_error": "23.775", "ppl": "2.5", "accuracy": "81.111", "wer": "18.885", "wps": "68.5", "ups": "0.54", "wpb": "125.9", "bsz": "8", "num_updates": "112000", "lr": "0.000417742", "gnorm": "3.61", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "314824"} [2024-07-09 16:17:27,368][train_inner][INFO] - {"epoch": 1, "update": 0.744, "loss": "1.298", "ntokens": "126.96", "acc_total": "126.96", "n_correct": "103.615", "wer_total": "126.96", "n_error": "23.34", "ppl": "2.46", "accuracy": "81.612", "wer": "18.384", "wps": "69.1", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "112200", "lr": "0.000416492", "gnorm": "3.689", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "315191"} [2024-07-09 16:23:35,094][train_inner][INFO] - {"epoch": 1, "update": 0.745, "loss": "1.29", "ntokens": "127.715", "acc_total": "127.715", "n_correct": "103.9", "wer_total": "127.715", "n_error": "23.815", "ppl": "2.45", "accuracy": "81.353", "wer": "18.647", "wps": "69.5", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "112400", "lr": "0.000415247", "gnorm": "3.46", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "315559"} [2024-07-09 16:26:38,911][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 17:08:33,844][valid][INFO] - {"epoch": 1, "valid_loss": "1.066", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3087", "valid_wer_total": "18.1585", "valid_n_error": "2.84789", "valid_ppl": "2.09", "valid_accuracy": "84.306", "valid_wer": "15.684", "valid_wps": "173.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "112500", "valid_best_accuracy": "84.959"} [2024-07-09 17:08:33,845][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 112500 updates [2024-07-09 17:08:33,845][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_112500.pt [2024-07-09 17:08:37,191][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_112500.pt [2024-07-09 17:08:39,336][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_112500.pt (epoch 1 @ 112500 updates, score 84.306) (writing took 5.491568648954853 seconds) [2024-07-09 17:08:59,359][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-09 17:11:44,301][train_inner][INFO] - {"epoch": 1, "update": 0.747, "loss": "1.316", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "103.065", "wer_total": "126.975", "n_error": "23.895", "ppl": "2.49", "accuracy": "81.17", "wer": "18.819", "wps": "8.8", "ups": "0.07", "wpb": "127", "bsz": "8", "num_updates": "112600", "lr": "0.000414005", "gnorm": "3.636", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "318448"} [2024-07-09 17:17:51,430][train_inner][INFO] - {"epoch": 1, "update": 0.748, "loss": "1.312", "ntokens": "125.79", "acc_total": "125.79", "n_correct": "102.015", "wer_total": "125.79", "n_error": "23.76", "ppl": "2.48", "accuracy": "81.099", "wer": "18.889", "wps": "68.5", "ups": "0.54", "wpb": "125.8", "bsz": "8", "num_updates": "112800", "lr": "0.000412766", "gnorm": "3.966", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "318815"} [2024-07-09 17:23:58,390][train_inner][INFO] - {"epoch": 1, "update": 0.749, "loss": "1.298", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "103.485", "wer_total": "126.725", "n_error": "23.225", "ppl": "2.46", "accuracy": "81.661", "wer": "18.327", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "113000", "lr": "0.000411531", "gnorm": "3.628", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "319182"} [2024-07-09 17:30:05,435][train_inner][INFO] - {"epoch": 1, "update": 0.751, "loss": "1.291", "ntokens": "127.545", "acc_total": "127.545", "n_correct": "104.285", "wer_total": "127.545", "n_error": "23.235", "ppl": "2.45", "accuracy": "81.763", "wer": "18.217", "wps": "69.5", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "113200", "lr": "0.0004103", "gnorm": "3.479", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "319549"} [2024-07-09 17:36:12,356][train_inner][INFO] - {"epoch": 1, "update": 0.752, "loss": "1.283", "ntokens": "126.315", "acc_total": "126.315", "n_correct": "103.195", "wer_total": "126.315", "n_error": "23.115", "ppl": "2.43", "accuracy": "81.697", "wer": "18.299", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "113400", "lr": "0.000409073", "gnorm": "3.502", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "319916"} [2024-07-09 17:42:19,327][train_inner][INFO] - {"epoch": 1, "update": 0.753, "loss": "1.289", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "104.025", "wer_total": "127.425", "n_error": "23.4", "ppl": "2.44", "accuracy": "81.636", "wer": "18.364", "wps": "69.4", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "113600", "lr": "0.00040785", "gnorm": "3.656", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "320283"} [2024-07-09 17:48:26,466][train_inner][INFO] - {"epoch": 1, "update": 0.755, "loss": "1.238", "ntokens": "126.83", "acc_total": "126.83", "n_correct": "104.42", "wer_total": "126.83", "n_error": "22.375", "ppl": "2.36", "accuracy": "82.331", "wer": "17.642", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "113800", "lr": "0.00040663", "gnorm": "3.679", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "320651"} [2024-07-09 17:54:35,924][train_inner][INFO] - {"epoch": 1, "update": 0.756, "loss": "1.254", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "104.33", "wer_total": "127.32", "n_error": "22.98", "ppl": "2.39", "accuracy": "81.943", "wer": "18.049", "wps": "68.9", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "114000", "lr": "0.000405413", "gnorm": "3.41", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "321020"} [2024-07-09 18:00:44,052][train_inner][INFO] - {"epoch": 1, "update": 0.757, "loss": "1.291", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "103.585", "wer_total": "127.06", "n_error": "23.47", "ppl": "2.45", "accuracy": "81.524", "wer": "18.472", "wps": "69", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "114200", "lr": "0.000404201", "gnorm": "3.758", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "321388"} [2024-07-09 18:06:50,838][train_inner][INFO] - {"epoch": 1, "update": 0.759, "loss": "1.301", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "102.86", "wer_total": "126.27", "n_error": "23.4", "ppl": "2.46", "accuracy": "81.46", "wer": "18.532", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "114400", "lr": "0.000402991", "gnorm": "3.515", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "321755"} [2024-07-09 18:12:57,881][train_inner][INFO] - {"epoch": 1, "update": 0.76, "loss": "1.274", "ntokens": "127.285", "acc_total": "127.285", "n_correct": "103.99", "wer_total": "127.285", "n_error": "23.27", "ppl": "2.42", "accuracy": "81.699", "wer": "18.282", "wps": "69.4", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "114600", "lr": "0.000401786", "gnorm": "3.703", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "322122"} [2024-07-09 18:19:04,665][train_inner][INFO] - {"epoch": 1, "update": 0.761, "loss": "1.287", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "104.09", "wer_total": "127.235", "n_error": "23.13", "ppl": "2.44", "accuracy": "81.809", "wer": "18.179", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "114800", "lr": "0.000400584", "gnorm": "3.639", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "322489"} [2024-07-09 18:25:12,006][train_inner][INFO] - {"epoch": 1, "update": 0.763, "loss": "1.273", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "104.19", "wer_total": "127.19", "n_error": "22.975", "ppl": "2.42", "accuracy": "81.917", "wer": "18.064", "wps": "69.2", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "115000", "lr": "0.000399386", "gnorm": "3.578", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "322856"} [2024-07-09 18:25:12,007][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 19:07:27,205][valid][INFO] - {"epoch": 1, "valid_loss": "1.052", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3011", "valid_wer_total": "18.1585", "valid_n_error": "2.85602", "valid_ppl": "2.07", "valid_accuracy": "84.264", "valid_wer": "15.728", "valid_wps": "171.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "115000", "valid_best_accuracy": "84.959"} [2024-07-09 19:07:27,206][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 115000 updates [2024-07-09 19:07:27,206][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_115000.pt [2024-07-09 19:07:30,395][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_115000.pt [2024-07-09 19:07:32,433][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_115000.pt (epoch 1 @ 115000 updates, score 84.264) (writing took 5.227503948030062 seconds) [2024-07-09 19:13:39,050][train_inner][INFO] - {"epoch": 1, "update": 0.764, "loss": "1.291", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "103.07", "wer_total": "126.18", "n_error": "23.1", "ppl": "2.45", "accuracy": "81.685", "wer": "18.307", "wps": "8.7", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "115200", "lr": "0.000398191", "gnorm": "3.706", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "325763"} [2024-07-09 19:17:57,808][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-09 19:19:48,016][train_inner][INFO] - {"epoch": 1, "update": 0.765, "loss": "1.284", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "103.77", "wer_total": "126.515", "n_error": "22.73", "ppl": "2.43", "accuracy": "82.022", "wer": "17.966", "wps": "68.6", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "115400", "lr": "0.000397", "gnorm": "3.693", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "326132"} [2024-07-09 19:25:55,062][train_inner][INFO] - {"epoch": 1, "update": 0.767, "loss": "1.237", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "104.62", "wer_total": "127.095", "n_error": "22.445", "ppl": "2.36", "accuracy": "82.316", "wer": "17.66", "wps": "69.3", "ups": "0.54", "wpb": "127.1", "bsz": "8", "num_updates": "115600", "lr": "0.000395813", "gnorm": "3.547", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "326499"} [2024-07-09 19:32:01,955][train_inner][INFO] - {"epoch": 1, "update": 0.768, "loss": "1.272", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "103.84", "wer_total": "126.725", "n_error": "22.84", "ppl": "2.41", "accuracy": "81.941", "wer": "18.023", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "115800", "lr": "0.000394629", "gnorm": "3.458", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "326866"} [2024-07-09 19:38:08,960][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "1.264", "ntokens": "127.46", "acc_total": "127.46", "n_correct": "104.19", "wer_total": "127.46", "n_error": "23.265", "ppl": "2.4", "accuracy": "81.743", "wer": "18.253", "wps": "69.5", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "116000", "lr": "0.000393448", "gnorm": "3.806", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "327233"} [2024-07-09 19:44:15,473][train_inner][INFO] - {"epoch": 1, "update": 0.771, "loss": "1.275", "ntokens": "125.78", "acc_total": "125.78", "n_correct": "102.88", "wer_total": "125.78", "n_error": "22.885", "ppl": "2.42", "accuracy": "81.794", "wer": "18.194", "wps": "68.6", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "116200", "lr": "0.000392271", "gnorm": "3.684", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "327600"} [2024-07-09 19:50:21,985][train_inner][INFO] - {"epoch": 1, "update": 0.772, "loss": "1.266", "ntokens": "127.07", "acc_total": "127.07", "n_correct": "103.365", "wer_total": "127.07", "n_error": "23.685", "ppl": "2.4", "accuracy": "81.345", "wer": "18.639", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "116400", "lr": "0.000391098", "gnorm": "3.544", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "327966"} [2024-07-09 19:56:28,658][train_inner][INFO] - {"epoch": 1, "update": 0.773, "loss": "1.252", "ntokens": "126.52", "acc_total": "126.52", "n_correct": "103.965", "wer_total": "126.52", "n_error": "22.535", "ppl": "2.38", "accuracy": "82.173", "wer": "17.811", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "116600", "lr": "0.000389928", "gnorm": "3.512", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "328333"} [2024-07-09 20:02:35,537][train_inner][INFO] - {"epoch": 1, "update": 0.775, "loss": "1.234", "ntokens": "126.495", "acc_total": "126.495", "n_correct": "104.47", "wer_total": "126.495", "n_error": "22.015", "ppl": "2.35", "accuracy": "82.588", "wer": "17.404", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "116800", "lr": "0.000388762", "gnorm": "3.821", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "328700"} [2024-07-09 20:08:42,359][train_inner][INFO] - {"epoch": 1, "update": 0.776, "loss": "1.286", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "104.06", "wer_total": "127.42", "n_error": "23.355", "ppl": "2.44", "accuracy": "81.667", "wer": "18.329", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "117000", "lr": "0.000387599", "gnorm": "3.704", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "329066"} [2024-07-09 20:14:48,945][train_inner][INFO] - {"epoch": 1, "update": 0.777, "loss": "1.25", "ntokens": "127.02", "acc_total": "127.02", "n_correct": "104.93", "wer_total": "127.02", "n_error": "22.085", "ppl": "2.38", "accuracy": "82.609", "wer": "17.387", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "117200", "lr": "0.000386439", "gnorm": "3.533", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "329433"} [2024-07-09 20:20:56,096][train_inner][INFO] - {"epoch": 1, "update": 0.779, "loss": "1.26", "ntokens": "126.79", "acc_total": "126.79", "n_correct": "104.415", "wer_total": "126.79", "n_error": "22.36", "ppl": "2.39", "accuracy": "82.353", "wer": "17.635", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "117400", "lr": "0.000385283", "gnorm": "3.561", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "329800"} [2024-07-09 20:23:59,651][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 21:05:52,844][valid][INFO] - {"epoch": 1, "valid_loss": "1.054", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.2923", "valid_wer_total": "18.1585", "valid_n_error": "2.86486", "valid_ppl": "2.08", "valid_accuracy": "84.216", "valid_wer": "15.777", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "117500", "valid_best_accuracy": "84.959"} [2024-07-09 21:05:52,844][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 117500 updates [2024-07-09 21:05:52,845][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_117500.pt [2024-07-09 21:05:56,047][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_117500.pt [2024-07-09 21:05:58,054][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_117500.pt (epoch 1 @ 117500 updates, score 84.216) (writing took 5.209692226024345 seconds) [2024-07-09 21:09:01,360][train_inner][INFO] - {"epoch": 1, "update": 0.78, "loss": "1.246", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "103.985", "wer_total": "126.875", "n_error": "22.885", "ppl": "2.37", "accuracy": "81.959", "wer": "18.037", "wps": "8.8", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "117600", "lr": "0.000384131", "gnorm": "3.632", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "332685"} [2024-07-09 21:15:08,060][train_inner][INFO] - {"epoch": 1, "update": 0.781, "loss": "1.286", "ntokens": "127.205", "acc_total": "127.205", "n_correct": "104.1", "wer_total": "127.205", "n_error": "23.095", "ppl": "2.44", "accuracy": "81.836", "wer": "18.156", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "117800", "lr": "0.000382982", "gnorm": "3.374", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "333052"} [2024-07-09 21:21:14,912][train_inner][INFO] - {"epoch": 1, "update": 0.783, "loss": "1.258", "ntokens": "126.69", "acc_total": "126.69", "n_correct": "104.07", "wer_total": "126.69", "n_error": "22.61", "ppl": "2.39", "accuracy": "82.145", "wer": "17.847", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "118000", "lr": "0.000381836", "gnorm": "3.329", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "333419"} [2024-07-09 21:27:22,066][train_inner][INFO] - {"epoch": 1, "update": 0.784, "loss": "1.232", "ntokens": "129.025", "acc_total": "129.025", "n_correct": "106.49", "wer_total": "129.025", "n_error": "22.53", "ppl": "2.35", "accuracy": "82.534", "wer": "17.462", "wps": "70.3", "ups": "0.54", "wpb": "129", "bsz": "8", "num_updates": "118200", "lr": "0.000380694", "gnorm": "3.55", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "333786"} [2024-07-09 21:33:28,600][train_inner][INFO] - {"epoch": 1, "update": 0.785, "loss": "1.244", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "104.28", "wer_total": "127.245", "n_error": "22.95", "ppl": "2.37", "accuracy": "81.952", "wer": "18.036", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "118400", "lr": "0.000379555", "gnorm": "3.781", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "334153"} [2024-07-09 21:39:35,352][train_inner][INFO] - {"epoch": 1, "update": 0.786, "loss": "1.252", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "104.54", "wer_total": "126.815", "n_error": "22.27", "ppl": "2.38", "accuracy": "82.435", "wer": "17.561", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "118600", "lr": "0.00037842", "gnorm": "3.556", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "334519"} [2024-07-09 21:45:42,176][train_inner][INFO] - {"epoch": 1, "update": 0.788, "loss": "1.28", "ntokens": "126.4", "acc_total": "126.4", "n_correct": "103.295", "wer_total": "126.4", "n_error": "23.075", "ppl": "2.43", "accuracy": "81.721", "wer": "18.256", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "118800", "lr": "0.000377288", "gnorm": "3.722", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "334886"} [2024-07-09 21:48:08,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-09 21:51:50,733][train_inner][INFO] - {"epoch": 1, "update": 0.789, "loss": "1.269", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "103.09", "wer_total": "126.27", "n_error": "23.16", "ppl": "2.41", "accuracy": "81.643", "wer": "18.342", "wps": "68.5", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "119000", "lr": "0.00037616", "gnorm": "3.944", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "335255"} [2024-07-09 21:57:57,721][train_inner][INFO] - {"epoch": 1, "update": 0.79, "loss": "1.282", "ntokens": "126.73", "acc_total": "126.73", "n_correct": "103.57", "wer_total": "126.73", "n_error": "23.145", "ppl": "2.43", "accuracy": "81.725", "wer": "18.263", "wps": "69.1", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "119200", "lr": "0.000375034", "gnorm": "3.625", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "335622"} [2024-07-09 22:04:04,649][train_inner][INFO] - {"epoch": 1, "update": 0.792, "loss": "1.258", "ntokens": "125.945", "acc_total": "125.945", "n_correct": "103.355", "wer_total": "125.945", "n_error": "22.57", "ppl": "2.39", "accuracy": "82.064", "wer": "17.921", "wps": "68.6", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "119400", "lr": "0.000373913", "gnorm": "3.379", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "335989"} [2024-07-09 22:10:11,034][train_inner][INFO] - {"epoch": 1, "update": 0.793, "loss": "1.26", "ntokens": "125.77", "acc_total": "125.77", "n_correct": "102.955", "wer_total": "125.77", "n_error": "22.79", "ppl": "2.4", "accuracy": "81.86", "wer": "18.12", "wps": "68.7", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "119600", "lr": "0.000372794", "gnorm": "3.376", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "336355"} [2024-07-09 22:16:17,521][train_inner][INFO] - {"epoch": 1, "update": 0.794, "loss": "1.165", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "105.005", "wer_total": "126.76", "n_error": "21.75", "ppl": "2.24", "accuracy": "82.838", "wer": "17.158", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "119800", "lr": "0.000371679", "gnorm": "3.207", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "336722"} [2024-07-09 22:22:24,146][train_inner][INFO] - {"epoch": 1, "update": 0.796, "loss": "1.213", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "104.545", "wer_total": "127.025", "n_error": "22.46", "ppl": "2.32", "accuracy": "82.303", "wer": "17.682", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "120000", "lr": "0.000370567", "gnorm": "3.548", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "337088"} [2024-07-09 22:22:24,147][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-09 23:04:15,741][valid][INFO] - {"epoch": 1, "valid_loss": "1.038", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3411", "valid_wer_total": "18.1585", "valid_n_error": "2.81609", "valid_ppl": "2.05", "valid_accuracy": "84.485", "valid_wer": "15.508", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "120000", "valid_best_accuracy": "84.959"} [2024-07-09 23:04:15,741][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 120000 updates [2024-07-09 23:04:15,742][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_120000.pt [2024-07-09 23:04:18,940][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_120000.pt [2024-07-09 23:04:21,114][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_120000.pt (epoch 1 @ 120000 updates, score 84.485) (writing took 5.3723500510677695 seconds) [2024-07-09 23:10:27,568][train_inner][INFO] - {"epoch": 1, "update": 0.797, "loss": "1.24", "ntokens": "127.5", "acc_total": "127.5", "n_correct": "104.67", "wer_total": "127.5", "n_error": "22.82", "ppl": "2.36", "accuracy": "82.094", "wer": "17.898", "wps": "8.8", "ups": "0.07", "wpb": "127.5", "bsz": "8", "num_updates": "120200", "lr": "0.000369459", "gnorm": "3.733", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "339972"} [2024-07-09 23:16:34,407][train_inner][INFO] - {"epoch": 1, "update": 0.798, "loss": "1.235", "ntokens": "126.825", "acc_total": "126.825", "n_correct": "104.235", "wer_total": "126.825", "n_error": "22.57", "ppl": "2.35", "accuracy": "82.188", "wer": "17.796", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "120400", "lr": "0.000368354", "gnorm": "3.572", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "340338"} [2024-07-09 23:22:41,101][train_inner][INFO] - {"epoch": 1, "update": 0.8, "loss": "1.268", "ntokens": "126.11", "acc_total": "126.11", "n_correct": "103.38", "wer_total": "126.11", "n_error": "22.72", "ppl": "2.41", "accuracy": "81.976", "wer": "18.016", "wps": "68.8", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "120600", "lr": "0.000367252", "gnorm": "3.527", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "340705"} [2024-07-09 23:28:47,464][train_inner][INFO] - {"epoch": 1, "update": 0.801, "loss": "1.27", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "104.24", "wer_total": "127.11", "n_error": "22.85", "ppl": "2.41", "accuracy": "82.008", "wer": "17.977", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "120800", "lr": "0.000366153", "gnorm": "3.689", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "341072"} [2024-07-09 23:34:54,961][train_inner][INFO] - {"epoch": 1, "update": 0.802, "loss": "1.245", "ntokens": "127.6", "acc_total": "127.6", "n_correct": "104.69", "wer_total": "127.6", "n_error": "22.91", "ppl": "2.37", "accuracy": "82.045", "wer": "17.955", "wps": "69.4", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "121000", "lr": "0.000365058", "gnorm": "3.431", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "341439"} [2024-07-09 23:41:01,459][train_inner][INFO] - {"epoch": 1, "update": 0.804, "loss": "1.296", "ntokens": "126.34", "acc_total": "126.34", "n_correct": "102.955", "wer_total": "126.34", "n_error": "23.38", "ppl": "2.46", "accuracy": "81.49", "wer": "18.506", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "121200", "lr": "0.000363966", "gnorm": "3.685", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "341806"} [2024-07-09 23:47:07,769][train_inner][INFO] - {"epoch": 1, "update": 0.805, "loss": "1.272", "ntokens": "126.345", "acc_total": "126.345", "n_correct": "103.025", "wer_total": "126.345", "n_error": "23.3", "ppl": "2.42", "accuracy": "81.543", "wer": "18.442", "wps": "69", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "121400", "lr": "0.000362877", "gnorm": "3.66", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "342172"} [2024-07-09 23:53:14,094][train_inner][INFO] - {"epoch": 1, "update": 0.806, "loss": "1.25", "ntokens": "125.905", "acc_total": "125.905", "n_correct": "103.34", "wer_total": "125.905", "n_error": "22.55", "ppl": "2.38", "accuracy": "82.078", "wer": "17.91", "wps": "68.7", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "121600", "lr": "0.000361792", "gnorm": "3.657", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "342538"} [2024-07-09 23:59:20,431][train_inner][INFO] - {"epoch": 1, "update": 0.808, "loss": "1.252", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "104.265", "wer_total": "126.665", "n_error": "22.375", "ppl": "2.38", "accuracy": "82.316", "wer": "17.665", "wps": "69.2", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "121800", "lr": "0.00036071", "gnorm": "3.369", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "342904"} [2024-07-10 00:05:26,973][train_inner][INFO] - {"epoch": 1, "update": 0.809, "loss": "1.195", "ntokens": "126.12", "acc_total": "126.12", "n_correct": "104.29", "wer_total": "126.12", "n_error": "21.825", "ppl": "2.29", "accuracy": "82.691", "wer": "17.305", "wps": "68.8", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "122000", "lr": "0.000359631", "gnorm": "3.598", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "343271"} [2024-07-10 00:06:38,461][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-10 00:11:35,339][train_inner][INFO] - {"epoch": 1, "update": 0.81, "loss": "1.272", "ntokens": "126.345", "acc_total": "126.345", "n_correct": "103.16", "wer_total": "126.345", "n_error": "23.15", "ppl": "2.41", "accuracy": "81.649", "wer": "18.323", "wps": "68.6", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "122200", "lr": "0.000358555", "gnorm": "3.729", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "343639"} [2024-07-10 00:17:43,693][train_inner][INFO] - {"epoch": 1, "update": 0.812, "loss": "1.239", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "104.01", "wer_total": "126.905", "n_error": "22.89", "ppl": "2.36", "accuracy": "81.959", "wer": "18.037", "wps": "68.9", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "122400", "lr": "0.000357482", "gnorm": "3.603", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "344008"} [2024-07-10 00:20:47,027][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 01:02:34,472][valid][INFO] - {"epoch": 1, "valid_loss": "1", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.3596", "valid_wer_total": "18.1585", "valid_n_error": "2.79758", "valid_ppl": "2", "valid_accuracy": "84.586", "valid_wer": "15.406", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "122500", "valid_best_accuracy": "84.959"} [2024-07-10 01:02:34,472][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 122500 updates [2024-07-10 01:02:34,473][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_122500.pt [2024-07-10 01:02:37,672][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_122500.pt [2024-07-10 01:02:39,828][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_122500.pt (epoch 1 @ 122500 updates, score 84.586) (writing took 5.355592811945826 seconds) [2024-07-10 01:05:43,089][train_inner][INFO] - {"epoch": 1, "update": 0.813, "loss": "1.218", "ntokens": "127.12", "acc_total": "127.12", "n_correct": "104.73", "wer_total": "127.12", "n_error": "22.38", "ppl": "2.33", "accuracy": "82.387", "wer": "17.605", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "122600", "lr": "0.000356413", "gnorm": "3.494", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "346887"} [2024-07-10 01:11:49,377][train_inner][INFO] - {"epoch": 1, "update": 0.814, "loss": "1.189", "ntokens": "125.505", "acc_total": "125.505", "n_correct": "104.04", "wer_total": "125.505", "n_error": "21.445", "ppl": "2.28", "accuracy": "82.897", "wer": "17.087", "wps": "68.5", "ups": "0.55", "wpb": "125.5", "bsz": "8", "num_updates": "122800", "lr": "0.000355347", "gnorm": "3.591", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "347253"} [2024-07-10 01:17:55,300][train_inner][INFO] - {"epoch": 1, "update": 0.816, "loss": "1.218", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "104.535", "wer_total": "126.735", "n_error": "22.195", "ppl": "2.33", "accuracy": "82.483", "wer": "17.513", "wps": "69.3", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "123000", "lr": "0.000354284", "gnorm": "3.531", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "347619"} [2024-07-10 01:24:01,528][train_inner][INFO] - {"epoch": 1, "update": 0.817, "loss": "1.208", "ntokens": "127.83", "acc_total": "127.83", "n_correct": "105.62", "wer_total": "127.83", "n_error": "22.205", "ppl": "2.31", "accuracy": "82.625", "wer": "17.371", "wps": "69.8", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "123200", "lr": "0.000353224", "gnorm": "3.608", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "347986"} [2024-07-10 01:30:07,901][train_inner][INFO] - {"epoch": 1, "update": 0.818, "loss": "1.134", "ntokens": "126.895", "acc_total": "126.895", "n_correct": "106.35", "wer_total": "126.895", "n_error": "20.54", "ppl": "2.19", "accuracy": "83.809", "wer": "16.187", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "123400", "lr": "0.000352168", "gnorm": "3.454", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "348352"} [2024-07-10 01:36:14,232][train_inner][INFO] - {"epoch": 1, "update": 0.82, "loss": "1.191", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "105.14", "wer_total": "126.82", "n_error": "21.67", "ppl": "2.28", "accuracy": "82.905", "wer": "17.087", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "123600", "lr": "0.000351114", "gnorm": "3.435", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "348718"} [2024-07-10 01:42:20,436][train_inner][INFO] - {"epoch": 1, "update": 0.821, "loss": "1.269", "ntokens": "126.595", "acc_total": "126.595", "n_correct": "103.69", "wer_total": "126.595", "n_error": "22.89", "ppl": "2.41", "accuracy": "81.907", "wer": "18.081", "wps": "69.1", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "123800", "lr": "0.000350064", "gnorm": "4.025", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "349084"} [2024-07-10 01:48:26,807][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "1.219", "ntokens": "126.215", "acc_total": "126.215", "n_correct": "104.165", "wer_total": "126.215", "n_error": "22.045", "ppl": "2.33", "accuracy": "82.53", "wer": "17.466", "wps": "68.9", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "124000", "lr": "0.000349017", "gnorm": "3.723", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "349451"} [2024-07-10 01:51:57,653][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-10 01:54:35,146][train_inner][INFO] - {"epoch": 1, "update": 0.824, "loss": "1.186", "ntokens": "127.21", "acc_total": "127.21", "n_correct": "105.625", "wer_total": "127.21", "n_error": "21.585", "ppl": "2.28", "accuracy": "83.032", "wer": "16.968", "wps": "69.1", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "124200", "lr": "0.000347973", "gnorm": "3.507", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "349819"} [2024-07-10 02:00:41,759][train_inner][INFO] - {"epoch": 1, "update": 0.825, "loss": "1.269", "ntokens": "127.645", "acc_total": "127.645", "n_correct": "104.51", "wer_total": "127.645", "n_error": "23.135", "ppl": "2.41", "accuracy": "81.876", "wer": "18.124", "wps": "69.6", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "124400", "lr": "0.000346932", "gnorm": "3.704", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "350186"} [2024-07-10 02:06:48,336][train_inner][INFO] - {"epoch": 1, "update": 0.826, "loss": "1.181", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "105.48", "wer_total": "127.375", "n_error": "21.89", "ppl": "2.27", "accuracy": "82.811", "wer": "17.185", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "124600", "lr": "0.000345894", "gnorm": "3.379", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "350552"} [2024-07-10 02:12:54,435][train_inner][INFO] - {"epoch": 1, "update": 0.828, "loss": "1.211", "ntokens": "125.23", "acc_total": "125.23", "n_correct": "104.195", "wer_total": "125.23", "n_error": "21.025", "ppl": "2.32", "accuracy": "83.203", "wer": "16.789", "wps": "68.4", "ups": "0.55", "wpb": "125.2", "bsz": "8", "num_updates": "124800", "lr": "0.00034486", "gnorm": "3.532", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "350918"} [2024-07-10 02:19:00,402][train_inner][INFO] - {"epoch": 1, "update": 0.829, "loss": "1.234", "ntokens": "125.84", "acc_total": "125.84", "n_correct": "103.81", "wer_total": "125.84", "n_error": "22.025", "ppl": "2.35", "accuracy": "82.494", "wer": "17.502", "wps": "68.8", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "125000", "lr": "0.000343828", "gnorm": "3.39", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "351284"} [2024-07-10 02:19:00,402][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 03:00:49,073][valid][INFO] - {"epoch": 1, "valid_loss": "0.973", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4328", "valid_wer_total": "18.1585", "valid_n_error": "2.72501", "valid_ppl": "1.96", "valid_accuracy": "84.99", "valid_wer": "15.007", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "125000", "valid_best_accuracy": "84.99"} [2024-07-10 03:00:49,073][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 125000 updates [2024-07-10 03:00:49,074][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_125000.pt [2024-07-10 03:00:52,224][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_125000.pt [2024-07-10 03:00:56,375][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_125000.pt (epoch 1 @ 125000 updates, score 84.99) (writing took 7.301948954002 seconds) [2024-07-10 03:07:02,636][train_inner][INFO] - {"epoch": 1, "update": 0.83, "loss": "1.196", "ntokens": "127.31", "acc_total": "127.31", "n_correct": "105.115", "wer_total": "127.31", "n_error": "22.185", "ppl": "2.29", "accuracy": "82.566", "wer": "17.426", "wps": "8.8", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "125200", "lr": "0.0003428", "gnorm": "3.564", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "354167"} [2024-07-10 03:13:09,150][train_inner][INFO] - {"epoch": 1, "update": 0.832, "loss": "1.191", "ntokens": "126.845", "acc_total": "126.845", "n_correct": "105.245", "wer_total": "126.845", "n_error": "21.6", "ppl": "2.28", "accuracy": "82.971", "wer": "17.029", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "125400", "lr": "0.000341774", "gnorm": "3.486", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "354533"} [2024-07-10 03:19:15,537][train_inner][INFO] - {"epoch": 1, "update": 0.833, "loss": "1.211", "ntokens": "126.42", "acc_total": "126.42", "n_correct": "104.16", "wer_total": "126.42", "n_error": "22.245", "ppl": "2.32", "accuracy": "82.392", "wer": "17.596", "wps": "69", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "125600", "lr": "0.000340752", "gnorm": "3.511", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "354900"} [2024-07-10 03:25:21,760][train_inner][INFO] - {"epoch": 1, "update": 0.834, "loss": "1.208", "ntokens": "127.935", "acc_total": "127.935", "n_correct": "105.735", "wer_total": "127.935", "n_error": "22.19", "ppl": "2.31", "accuracy": "82.647", "wer": "17.345", "wps": "69.9", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "125800", "lr": "0.000339733", "gnorm": "3.341", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "355266"} [2024-07-10 03:31:28,507][train_inner][INFO] - {"epoch": 1, "update": 0.836, "loss": "1.168", "ntokens": "127.085", "acc_total": "127.085", "n_correct": "105.315", "wer_total": "127.085", "n_error": "21.765", "ppl": "2.25", "accuracy": "82.87", "wer": "17.126", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "126000", "lr": "0.000338716", "gnorm": "3.55", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "355633"} [2024-07-10 03:37:34,831][train_inner][INFO] - {"epoch": 1, "update": 0.837, "loss": "1.194", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "104.795", "wer_total": "126.81", "n_error": "22.01", "ppl": "2.29", "accuracy": "82.639", "wer": "17.357", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "126200", "lr": "0.000337703", "gnorm": "3.704", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "355999"} [2024-07-10 03:43:43,697][train_inner][INFO] - {"epoch": 1, "update": 0.838, "loss": "1.154", "ntokens": "127.475", "acc_total": "127.475", "n_correct": "105.66", "wer_total": "127.475", "n_error": "21.8", "ppl": "2.23", "accuracy": "82.887", "wer": "17.101", "wps": "69.1", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "126400", "lr": "0.000336693", "gnorm": "3.686", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "356368"} [2024-07-10 03:49:52,699][train_inner][INFO] - {"epoch": 1, "update": 0.84, "loss": "1.164", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "105.91", "wer_total": "127.485", "n_error": "21.56", "ppl": "2.24", "accuracy": "83.076", "wer": "16.912", "wps": "69.1", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "126600", "lr": "0.000335686", "gnorm": "3.345", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "356737"} [2024-07-10 03:55:59,448][train_inner][INFO] - {"epoch": 1, "update": 0.841, "loss": "1.216", "ntokens": "127.415", "acc_total": "127.415", "n_correct": "105.185", "wer_total": "127.415", "n_error": "22.215", "ppl": "2.32", "accuracy": "82.553", "wer": "17.435", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "126800", "lr": "0.000334682", "gnorm": "3.713", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "357104"} [2024-07-10 04:02:06,059][train_inner][INFO] - {"epoch": 1, "update": 0.842, "loss": "1.205", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "105.175", "wer_total": "127.44", "n_error": "22.255", "ppl": "2.31", "accuracy": "82.529", "wer": "17.463", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "127000", "lr": "0.000333681", "gnorm": "3.452", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "357470"} [2024-07-10 04:08:12,441][train_inner][INFO] - {"epoch": 1, "update": 0.844, "loss": "1.2", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "104.705", "wer_total": "127.03", "n_error": "22.315", "ppl": "2.3", "accuracy": "82.425", "wer": "17.567", "wps": "69.3", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "127200", "lr": "0.000332682", "gnorm": "3.525", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "357836"} [2024-07-10 04:14:18,937][train_inner][INFO] - {"epoch": 1, "update": 0.845, "loss": "1.177", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "105.65", "wer_total": "127.095", "n_error": "21.425", "ppl": "2.26", "accuracy": "83.127", "wer": "16.857", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "127400", "lr": "0.000331687", "gnorm": "3.504", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "358203"} [2024-07-10 04:14:35,354][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-10 04:17:24,181][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 04:59:13,783][valid][INFO] - {"epoch": 1, "valid_loss": "0.985", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.452", "valid_wer_total": "18.1585", "valid_n_error": "2.70484", "valid_ppl": "1.98", "valid_accuracy": "85.095", "valid_wer": "14.896", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "127500", "valid_best_accuracy": "85.095"} [2024-07-10 04:59:13,784][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 127500 updates [2024-07-10 04:59:13,784][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_127500.pt [2024-07-10 04:59:16,989][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_127500.pt [2024-07-10 04:59:21,102][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_127500.pt (epoch 1 @ 127500 updates, score 85.095) (writing took 7.318313949042931 seconds) [2024-07-10 05:02:26,975][train_inner][INFO] - {"epoch": 1, "update": 0.846, "loss": "1.194", "ntokens": "127.88", "acc_total": "127.88", "n_correct": "105.88", "wer_total": "127.88", "n_error": "22", "ppl": "2.29", "accuracy": "82.796", "wer": "17.204", "wps": "8.9", "ups": "0.07", "wpb": "127.9", "bsz": "8", "num_updates": "127600", "lr": "0.000330695", "gnorm": "3.756", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "361091"} [2024-07-10 05:08:34,494][train_inner][INFO] - {"epoch": 1, "update": 0.848, "loss": "1.18", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "105.44", "wer_total": "127.155", "n_error": "21.71", "ppl": "2.27", "accuracy": "82.922", "wer": "17.074", "wps": "69.2", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "127800", "lr": "0.000329706", "gnorm": "3.554", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "361459"} [2024-07-10 05:14:40,934][train_inner][INFO] - {"epoch": 1, "update": 0.849, "loss": "1.177", "ntokens": "126.67", "acc_total": "126.67", "n_correct": "104.775", "wer_total": "126.67", "n_error": "21.88", "ppl": "2.26", "accuracy": "82.715", "wer": "17.273", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "128000", "lr": "0.00032872", "gnorm": "3.475", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "361825"} [2024-07-10 05:20:47,357][train_inner][INFO] - {"epoch": 1, "update": 0.85, "loss": "1.162", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "105.7", "wer_total": "126.9", "n_error": "21.19", "ppl": "2.24", "accuracy": "83.294", "wer": "16.698", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "128200", "lr": "0.000327736", "gnorm": "3.341", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "362191"} [2024-07-10 05:26:53,765][train_inner][INFO] - {"epoch": 1, "update": 0.851, "loss": "1.237", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "104.255", "wer_total": "126.76", "n_error": "22.48", "ppl": "2.36", "accuracy": "82.246", "wer": "17.734", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "128400", "lr": "0.000326756", "gnorm": "3.618", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "362558"} [2024-07-10 05:30:25,956][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-10 05:33:01,951][train_inner][INFO] - {"epoch": 1, "update": 0.853, "loss": "1.171", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "104.09", "wer_total": "126.01", "n_error": "21.915", "ppl": "2.25", "accuracy": "82.605", "wer": "17.391", "wps": "68.4", "ups": "0.54", "wpb": "126", "bsz": "8", "num_updates": "128600", "lr": "0.000325779", "gnorm": "4.448", "loss_scale": "256", "train_wall": "367", "gb_free": "6.5", "wall": "362926"} [2024-07-10 05:39:08,436][train_inner][INFO] - {"epoch": 1, "update": 0.854, "loss": "1.189", "ntokens": "127.955", "acc_total": "127.955", "n_correct": "105.855", "wer_total": "127.955", "n_error": "22.085", "ppl": "2.28", "accuracy": "82.728", "wer": "17.26", "wps": "69.8", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "128800", "lr": "0.000324804", "gnorm": "3.535", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "363292"} [2024-07-10 05:45:19,044][train_inner][INFO] - {"epoch": 1, "update": 0.855, "loss": "1.221", "ntokens": "126.505", "acc_total": "126.505", "n_correct": "103.695", "wer_total": "126.505", "n_error": "22.805", "ppl": "2.33", "accuracy": "81.969", "wer": "18.027", "wps": "68.3", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "129000", "lr": "0.000323833", "gnorm": "4.016", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "363663"} [2024-07-10 05:51:27,225][train_inner][INFO] - {"epoch": 1, "update": 0.857, "loss": "1.154", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "106.485", "wer_total": "127.64", "n_error": "21.15", "ppl": "2.23", "accuracy": "83.426", "wer": "16.57", "wps": "69.3", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "129200", "lr": "0.000322864", "gnorm": "3.356", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "364031"} [2024-07-10 05:57:33,368][train_inner][INFO] - {"epoch": 1, "update": 0.858, "loss": "1.17", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "105.335", "wer_total": "127.235", "n_error": "21.895", "ppl": "2.25", "accuracy": "82.788", "wer": "17.208", "wps": "69.5", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "129400", "lr": "0.000321898", "gnorm": "3.383", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "364397"} [2024-07-10 06:03:39,705][train_inner][INFO] - {"epoch": 1, "update": 0.859, "loss": "1.158", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "104.595", "wer_total": "126.01", "n_error": "21.405", "ppl": "2.23", "accuracy": "83.005", "wer": "16.987", "wps": "68.8", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "129600", "lr": "0.000320935", "gnorm": "3.469", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "364764"} [2024-07-10 06:09:46,135][train_inner][INFO] - {"epoch": 1, "update": 0.861, "loss": "1.17", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "105.395", "wer_total": "126.785", "n_error": "21.38", "ppl": "2.25", "accuracy": "83.129", "wer": "16.863", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "129800", "lr": "0.000319975", "gnorm": "3.614", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "365130"} [2024-07-10 06:15:52,713][train_inner][INFO] - {"epoch": 1, "update": 0.862, "loss": "1.166", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "104.94", "wer_total": "126.31", "n_error": "21.36", "ppl": "2.24", "accuracy": "83.081", "wer": "16.911", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "130000", "lr": "0.000319018", "gnorm": "3.477", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "365497"} [2024-07-10 06:15:52,714][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 06:57:42,640][valid][INFO] - {"epoch": 1, "valid_loss": "0.953", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4712", "valid_wer_total": "18.1585", "valid_n_error": "2.6862", "valid_ppl": "1.94", "valid_accuracy": "85.201", "valid_wer": "14.793", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "130000", "valid_best_accuracy": "85.201"} [2024-07-10 06:57:42,641][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 130000 updates [2024-07-10 06:57:42,641][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_130000.pt [2024-07-10 06:57:45,822][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_130000.pt [2024-07-10 06:57:50,091][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_130000.pt (epoch 1 @ 130000 updates, score 85.201) (writing took 7.449995688977651 seconds) [2024-07-10 07:03:56,125][train_inner][INFO] - {"epoch": 1, "update": 0.863, "loss": "1.207", "ntokens": "126.965", "acc_total": "126.965", "n_correct": "104.785", "wer_total": "126.965", "n_error": "22.175", "ppl": "2.31", "accuracy": "82.531", "wer": "17.465", "wps": "8.8", "ups": "0.07", "wpb": "127", "bsz": "8", "num_updates": "130200", "lr": "0.000318064", "gnorm": "3.641", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "368380"} [2024-07-10 07:10:02,412][train_inner][INFO] - {"epoch": 1, "update": 0.865, "loss": "1.176", "ntokens": "127.035", "acc_total": "127.035", "n_correct": "105.25", "wer_total": "127.035", "n_error": "21.775", "ppl": "2.26", "accuracy": "82.851", "wer": "17.141", "wps": "69.4", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "130400", "lr": "0.000317113", "gnorm": "3.471", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "368746"} [2024-07-10 07:16:08,706][train_inner][INFO] - {"epoch": 1, "update": 0.866, "loss": "1.201", "ntokens": "126.67", "acc_total": "126.67", "n_correct": "104.885", "wer_total": "126.67", "n_error": "21.78", "ppl": "2.3", "accuracy": "82.802", "wer": "17.194", "wps": "69.2", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "130600", "lr": "0.000316164", "gnorm": "3.504", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "369113"} [2024-07-10 07:22:15,477][train_inner][INFO] - {"epoch": 1, "update": 0.867, "loss": "1.147", "ntokens": "127.495", "acc_total": "127.495", "n_correct": "106.445", "wer_total": "127.495", "n_error": "21.035", "ppl": "2.22", "accuracy": "83.49", "wer": "16.499", "wps": "69.5", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "130800", "lr": "0.000315218", "gnorm": "3.381", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "369480"} [2024-07-10 07:28:21,910][train_inner][INFO] - {"epoch": 1, "update": 0.869, "loss": "1.144", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "105.875", "wer_total": "126.855", "n_error": "20.96", "ppl": "2.21", "accuracy": "83.461", "wer": "16.523", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "131000", "lr": "0.000314275", "gnorm": "3.383", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "369846"} [2024-07-10 07:34:28,659][train_inner][INFO] - {"epoch": 1, "update": 0.87, "loss": "1.184", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "105.295", "wer_total": "127.245", "n_error": "21.94", "ppl": "2.27", "accuracy": "82.75", "wer": "17.242", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "131200", "lr": "0.000313335", "gnorm": "3.696", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "370213"} [2024-07-10 07:40:35,128][train_inner][INFO] - {"epoch": 1, "update": 0.871, "loss": "1.186", "ntokens": "126.06", "acc_total": "126.06", "n_correct": "104.63", "wer_total": "126.06", "n_error": "21.43", "ppl": "2.27", "accuracy": "83", "wer": "17", "wps": "68.8", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "131400", "lr": "0.000312398", "gnorm": "3.678", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "370579"} [2024-07-10 07:46:41,572][train_inner][INFO] - {"epoch": 1, "update": 0.873, "loss": "1.157", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "105.55", "wer_total": "126.865", "n_error": "21.28", "ppl": "2.23", "accuracy": "83.199", "wer": "16.774", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "131600", "lr": "0.000311464", "gnorm": "3.556", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "370946"} [2024-07-10 07:52:48,028][train_inner][INFO] - {"epoch": 1, "update": 0.874, "loss": "1.159", "ntokens": "128.35", "acc_total": "128.35", "n_correct": "106.665", "wer_total": "128.35", "n_error": "21.685", "ppl": "2.23", "accuracy": "83.105", "wer": "16.895", "wps": "70", "ups": "0.55", "wpb": "128.3", "bsz": "8", "num_updates": "131800", "lr": "0.000310532", "gnorm": "3.238", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "371312"} [2024-07-10 07:58:54,393][train_inner][INFO] - {"epoch": 1, "update": 0.875, "loss": "1.098", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "106.725", "wer_total": "127.2", "n_error": "20.475", "ppl": "2.14", "accuracy": "83.903", "wer": "16.097", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "132000", "lr": "0.000309603", "gnorm": "3.461", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "371678"} [2024-07-10 08:05:01,264][train_inner][INFO] - {"epoch": 1, "update": 0.877, "loss": "1.136", "ntokens": "128.42", "acc_total": "128.42", "n_correct": "106.16", "wer_total": "128.42", "n_error": "22.26", "ppl": "2.2", "accuracy": "82.666", "wer": "17.334", "wps": "70", "ups": "0.55", "wpb": "128.4", "bsz": "8", "num_updates": "132200", "lr": "0.000308677", "gnorm": "3.264", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "372045"} [2024-07-10 08:11:07,861][train_inner][INFO] - {"epoch": 1, "update": 0.878, "loss": "1.161", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "105.45", "wer_total": "126.945", "n_error": "21.485", "ppl": "2.24", "accuracy": "83.067", "wer": "16.925", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "132400", "lr": "0.000307754", "gnorm": "3.364", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "372412"} [2024-07-10 08:14:11,054][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 08:55:59,643][valid][INFO] - {"epoch": 1, "valid_loss": "0.947", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.5421", "valid_wer_total": "18.1585", "valid_n_error": "2.61534", "valid_ppl": "1.93", "valid_accuracy": "85.591", "valid_wer": "14.403", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "132500", "valid_best_accuracy": "85.591"} [2024-07-10 08:55:59,644][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 132500 updates [2024-07-10 08:55:59,644][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_132500.pt [2024-07-10 08:56:02,834][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_132500.pt [2024-07-10 08:56:06,947][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_132500.pt (epoch 1 @ 132500 updates, score 85.591) (writing took 7.302950576995499 seconds) [2024-07-10 08:59:09,846][train_inner][INFO] - {"epoch": 1, "update": 0.879, "loss": "1.18", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "104.75", "wer_total": "126.58", "n_error": "21.805", "ppl": "2.27", "accuracy": "82.754", "wer": "17.226", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "132600", "lr": "0.000306833", "gnorm": "3.41", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "375294"} [2024-07-10 09:05:16,292][train_inner][INFO] - {"epoch": 1, "update": 0.881, "loss": "1.168", "ntokens": "126.445", "acc_total": "126.445", "n_correct": "105.425", "wer_total": "126.445", "n_error": "21.015", "ppl": "2.25", "accuracy": "83.376", "wer": "16.62", "wps": "69", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "132800", "lr": "0.000305915", "gnorm": "3.178", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "375660"} [2024-07-10 09:11:22,354][train_inner][INFO] - {"epoch": 1, "update": 0.882, "loss": "1.115", "ntokens": "126.65", "acc_total": "126.65", "n_correct": "106.015", "wer_total": "126.65", "n_error": "20.625", "ppl": "2.17", "accuracy": "83.707", "wer": "16.285", "wps": "69.2", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "133000", "lr": "0.000305", "gnorm": "3.177", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "376026"} [2024-07-10 09:17:28,881][train_inner][INFO] - {"epoch": 1, "update": 0.883, "loss": "1.105", "ntokens": "127.31", "acc_total": "127.31", "n_correct": "107.155", "wer_total": "127.31", "n_error": "20.14", "ppl": "2.15", "accuracy": "84.169", "wer": "15.82", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "133200", "lr": "0.000304088", "gnorm": "3.048", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "376393"} [2024-07-10 09:23:35,686][train_inner][INFO] - {"epoch": 1, "update": 0.885, "loss": "1.152", "ntokens": "128.08", "acc_total": "128.08", "n_correct": "106.855", "wer_total": "128.08", "n_error": "21.205", "ppl": "2.22", "accuracy": "83.428", "wer": "16.556", "wps": "69.8", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "133400", "lr": "0.000303178", "gnorm": "3.353", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "376760"} [2024-07-10 09:29:42,039][train_inner][INFO] - {"epoch": 1, "update": 0.886, "loss": "1.173", "ntokens": "126.24", "acc_total": "126.24", "n_correct": "105.35", "wer_total": "126.24", "n_error": "20.88", "ppl": "2.25", "accuracy": "83.452", "wer": "16.54", "wps": "68.9", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "133600", "lr": "0.000302271", "gnorm": "3.431", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "377126"} [2024-07-10 09:30:47,878][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-10 09:35:50,226][train_inner][INFO] - {"epoch": 1, "update": 0.887, "loss": "1.13", "ntokens": "126.375", "acc_total": "126.375", "n_correct": "105.635", "wer_total": "126.375", "n_error": "20.74", "ppl": "2.19", "accuracy": "83.589", "wer": "16.411", "wps": "68.6", "ups": "0.54", "wpb": "126.4", "bsz": "8", "num_updates": "133800", "lr": "0.000301367", "gnorm": "3.351", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "377494"} [2024-07-10 09:41:56,914][train_inner][INFO] - {"epoch": 1, "update": 0.889, "loss": "1.167", "ntokens": "127.825", "acc_total": "127.825", "n_correct": "106.375", "wer_total": "127.825", "n_error": "21.445", "ppl": "2.24", "accuracy": "83.219", "wer": "16.777", "wps": "69.7", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "134000", "lr": "0.000300466", "gnorm": "3.399", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "377861"} [2024-07-10 09:48:03,723][train_inner][INFO] - {"epoch": 1, "update": 0.89, "loss": "1.166", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "105.62", "wer_total": "126.775", "n_error": "21.155", "ppl": "2.24", "accuracy": "83.313", "wer": "16.687", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "134200", "lr": "0.000299567", "gnorm": "3.2", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "378228"} [2024-07-10 09:54:12,297][train_inner][INFO] - {"epoch": 1, "update": 0.891, "loss": "1.165", "ntokens": "126.48", "acc_total": "126.48", "n_correct": "105.105", "wer_total": "126.48", "n_error": "21.365", "ppl": "2.24", "accuracy": "83.1", "wer": "16.892", "wps": "68.6", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "134400", "lr": "0.000298671", "gnorm": "3.245", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "378596"} [2024-07-10 10:00:24,167][train_inner][INFO] - {"epoch": 1, "update": 0.893, "loss": "1.132", "ntokens": "127.22", "acc_total": "127.22", "n_correct": "106.33", "wer_total": "127.22", "n_error": "20.85", "ppl": "2.19", "accuracy": "83.58", "wer": "16.389", "wps": "68.4", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "134600", "lr": "0.000297777", "gnorm": "3.507", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "378968"} [2024-07-10 10:06:30,741][train_inner][INFO] - {"epoch": 1, "update": 0.894, "loss": "1.133", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "106.22", "wer_total": "126.795", "n_error": "20.565", "ppl": "2.19", "accuracy": "83.773", "wer": "16.219", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "134800", "lr": "0.000296887", "gnorm": "3.434", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "379335"} [2024-07-10 10:12:37,573][train_inner][INFO] - {"epoch": 1, "update": 0.895, "loss": "1.147", "ntokens": "127.445", "acc_total": "127.445", "n_correct": "106.635", "wer_total": "127.445", "n_error": "20.8", "ppl": "2.21", "accuracy": "83.671", "wer": "16.321", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "135000", "lr": "0.000295999", "gnorm": "3.26", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "379702"} [2024-07-10 10:12:37,573][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 10:54:28,908][valid][INFO] - {"epoch": 1, "valid_loss": "0.927", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.6132", "valid_wer_total": "18.1585", "valid_n_error": "2.54469", "valid_ppl": "1.9", "valid_accuracy": "85.983", "valid_wer": "14.014", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "135000", "valid_best_accuracy": "85.983"} [2024-07-10 10:54:28,908][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 135000 updates [2024-07-10 10:54:28,909][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_135000.pt [2024-07-10 10:54:32,218][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_135000.pt [2024-07-10 10:54:36,444][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_135000.pt (epoch 1 @ 135000 updates, score 85.983) (writing took 7.536114320973866 seconds) [2024-07-10 11:00:42,620][train_inner][INFO] - {"epoch": 1, "update": 0.897, "loss": "1.125", "ntokens": "126.91", "acc_total": "126.91", "n_correct": "105.81", "wer_total": "126.91", "n_error": "21.08", "ppl": "2.18", "accuracy": "83.374", "wer": "16.61", "wps": "8.8", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "135200", "lr": "0.000295113", "gnorm": "3.342", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "382587"} [2024-07-10 11:06:49,337][train_inner][INFO] - {"epoch": 1, "update": 0.898, "loss": "1.125", "ntokens": "127.08", "acc_total": "127.08", "n_correct": "106.01", "wer_total": "127.08", "n_error": "21.06", "ppl": "2.18", "accuracy": "83.42", "wer": "16.572", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "135400", "lr": "0.000294231", "gnorm": "3.34", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "382953"} [2024-07-10 11:06:51,118][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-10 11:12:57,650][train_inner][INFO] - {"epoch": 1, "update": 0.899, "loss": "1.154", "ntokens": "124.715", "acc_total": "124.715", "n_correct": "104.235", "wer_total": "124.715", "n_error": "20.48", "ppl": "2.23", "accuracy": "83.579", "wer": "16.421", "wps": "67.7", "ups": "0.54", "wpb": "124.7", "bsz": "8", "num_updates": "135600", "lr": "0.00029335", "gnorm": "3.688", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "383322"} [2024-07-10 11:19:04,157][train_inner][INFO] - {"epoch": 1, "update": 0.901, "loss": "1.111", "ntokens": "126.8", "acc_total": "126.8", "n_correct": "106.555", "wer_total": "126.8", "n_error": "20.23", "ppl": "2.16", "accuracy": "84.034", "wer": "15.954", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "135800", "lr": "0.000292473", "gnorm": "3.253", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "383688"} [2024-07-10 11:25:11,239][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "1.067", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "107.735", "wer_total": "127.785", "n_error": "20.03", "ppl": "2.1", "accuracy": "84.31", "wer": "15.675", "wps": "69.6", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "136000", "lr": "0.000291598", "gnorm": "3.143", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "384055"} [2024-07-10 11:31:17,748][train_inner][INFO] - {"epoch": 1, "update": 0.903, "loss": "1.143", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "105.78", "wer_total": "126.295", "n_error": "20.515", "ppl": "2.21", "accuracy": "83.756", "wer": "16.244", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "136200", "lr": "0.000290726", "gnorm": "3.368", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "384422"} [2024-07-10 11:37:24,498][train_inner][INFO] - {"epoch": 1, "update": 0.905, "loss": "1.059", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "107.355", "wer_total": "126.975", "n_error": "19.61", "ppl": "2.08", "accuracy": "84.548", "wer": "15.444", "wps": "69.2", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "136400", "lr": "0.000289856", "gnorm": "3.665", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "384789"} [2024-07-10 11:43:31,353][train_inner][INFO] - {"epoch": 1, "update": 0.906, "loss": "1.127", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "106.7", "wer_total": "127.26", "n_error": "20.555", "ppl": "2.18", "accuracy": "83.844", "wer": "16.152", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "136600", "lr": "0.000288989", "gnorm": "3.199", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "385155"} [2024-07-10 11:49:38,081][train_inner][INFO] - {"epoch": 1, "update": 0.907, "loss": "1.132", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "106.06", "wer_total": "126.675", "n_error": "20.605", "ppl": "2.19", "accuracy": "83.726", "wer": "16.266", "wps": "69.1", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "136800", "lr": "0.000288125", "gnorm": "3.263", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "385522"} [2024-07-10 11:55:45,044][train_inner][INFO] - {"epoch": 1, "update": 0.909, "loss": "1.109", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "106.11", "wer_total": "126.255", "n_error": "20.125", "ppl": "2.16", "accuracy": "84.044", "wer": "15.94", "wps": "68.8", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "137000", "lr": "0.000287263", "gnorm": "3.127", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "385889"} [2024-07-10 12:01:52,189][train_inner][INFO] - {"epoch": 1, "update": 0.91, "loss": "1.195", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "105.43", "wer_total": "126.735", "n_error": "21.28", "ppl": "2.29", "accuracy": "83.189", "wer": "16.791", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "137200", "lr": "0.000286404", "gnorm": "3.634", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "386256"} [2024-07-10 12:07:59,245][train_inner][INFO] - {"epoch": 1, "update": 0.911, "loss": "1.132", "ntokens": "127.16", "acc_total": "127.16", "n_correct": "106.225", "wer_total": "127.16", "n_error": "20.925", "ppl": "2.19", "accuracy": "83.536", "wer": "16.456", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "137400", "lr": "0.000285547", "gnorm": "3.385", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "386623"} [2024-07-10 12:11:02,662][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 12:53:28,425][valid][INFO] - {"epoch": 1, "valid_loss": "0.906", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.6633", "valid_wer_total": "18.1585", "valid_n_error": "2.49404", "valid_ppl": "1.87", "valid_accuracy": "86.259", "valid_wer": "13.735", "valid_wps": "171.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "137500", "valid_best_accuracy": "86.259"} [2024-07-10 12:53:28,425][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 137500 updates [2024-07-10 12:53:28,425][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_137500.pt [2024-07-10 12:53:31,613][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_137500.pt [2024-07-10 12:53:35,823][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_137500.pt (epoch 1 @ 137500 updates, score 86.259) (writing took 7.397306075086817 seconds) [2024-07-10 12:56:39,068][train_inner][INFO] - {"epoch": 1, "update": 0.912, "loss": "1.087", "ntokens": "126.435", "acc_total": "126.435", "n_correct": "106.655", "wer_total": "126.435", "n_error": "19.775", "ppl": "2.12", "accuracy": "84.356", "wer": "15.64", "wps": "8.7", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "137600", "lr": "0.000284693", "gnorm": "3.36", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "389543"} [2024-07-10 13:02:46,340][train_inner][INFO] - {"epoch": 1, "update": 0.914, "loss": "1.083", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "106.22", "wer_total": "126.295", "n_error": "20.055", "ppl": "2.12", "accuracy": "84.105", "wer": "15.879", "wps": "68.8", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "137800", "lr": "0.000283841", "gnorm": "3.194", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "389910"} [2024-07-10 13:02:48,103][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-10 13:08:55,457][train_inner][INFO] - {"epoch": 1, "update": 0.915, "loss": "1.08", "ntokens": "128.15", "acc_total": "128.15", "n_correct": "107.825", "wer_total": "128.15", "n_error": "20.32", "ppl": "2.11", "accuracy": "84.14", "wer": "15.856", "wps": "69.4", "ups": "0.54", "wpb": "128.2", "bsz": "8", "num_updates": "138000", "lr": "0.000282992", "gnorm": "3.297", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "390280"} [2024-07-10 13:15:02,248][train_inner][INFO] - {"epoch": 1, "update": 0.916, "loss": "1.123", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "106.85", "wer_total": "127.375", "n_error": "20.515", "ppl": "2.18", "accuracy": "83.886", "wer": "16.106", "wps": "69.5", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "138200", "lr": "0.000282146", "gnorm": "3.223", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "390646"} [2024-07-10 13:21:09,188][train_inner][INFO] - {"epoch": 1, "update": 0.918, "loss": "1.105", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "106.82", "wer_total": "126.78", "n_error": "19.95", "ppl": "2.15", "accuracy": "84.256", "wer": "15.736", "wps": "69.1", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "138400", "lr": "0.000281302", "gnorm": "3.24", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "391013"} [2024-07-10 13:27:15,764][train_inner][INFO] - {"epoch": 1, "update": 0.919, "loss": "1.108", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "106.675", "wer_total": "127.145", "n_error": "20.45", "ppl": "2.16", "accuracy": "83.9", "wer": "16.084", "wps": "69.4", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "138600", "lr": "0.00028046", "gnorm": "3.393", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "391380"} [2024-07-10 13:33:22,851][train_inner][INFO] - {"epoch": 1, "update": 0.92, "loss": "1.075", "ntokens": "126.8", "acc_total": "126.8", "n_correct": "107.3", "wer_total": "126.8", "n_error": "19.485", "ppl": "2.11", "accuracy": "84.621", "wer": "15.367", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "138800", "lr": "0.000279621", "gnorm": "3.292", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "391747"} [2024-07-10 13:39:29,480][train_inner][INFO] - {"epoch": 1, "update": 0.922, "loss": "1.091", "ntokens": "125.655", "acc_total": "125.655", "n_correct": "105.965", "wer_total": "125.655", "n_error": "19.685", "ppl": "2.13", "accuracy": "84.33", "wer": "15.666", "wps": "68.5", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "139000", "lr": "0.000278785", "gnorm": "3.189", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "392114"} [2024-07-10 13:45:36,748][train_inner][INFO] - {"epoch": 1, "update": 0.923, "loss": "1.107", "ntokens": "127.765", "acc_total": "127.765", "n_correct": "107.38", "wer_total": "127.765", "n_error": "20.375", "ppl": "2.15", "accuracy": "84.045", "wer": "15.947", "wps": "69.6", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "139200", "lr": "0.000277951", "gnorm": "3.438", "loss_scale": "256", "train_wall": "367", "gb_free": "6.5", "wall": "392481"} [2024-07-10 13:51:43,993][train_inner][INFO] - {"epoch": 1, "update": 0.924, "loss": "1.147", "ntokens": "125.665", "acc_total": "125.665", "n_correct": "104.71", "wer_total": "125.665", "n_error": "20.94", "ppl": "2.21", "accuracy": "83.325", "wer": "16.663", "wps": "68.4", "ups": "0.54", "wpb": "125.7", "bsz": "8", "num_updates": "139400", "lr": "0.00027712", "gnorm": "3.34", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "392848"} [2024-07-10 13:57:52,350][train_inner][INFO] - {"epoch": 1, "update": 0.926, "loss": "1.141", "ntokens": "127.165", "acc_total": "127.165", "n_correct": "105.825", "wer_total": "127.165", "n_error": "21.33", "ppl": "2.21", "accuracy": "83.219", "wer": "16.773", "wps": "69", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "139600", "lr": "0.000276291", "gnorm": "3.674", "loss_scale": "256", "train_wall": "367", "gb_free": "6.5", "wall": "393216"} [2024-07-10 14:03:59,620][train_inner][INFO] - {"epoch": 1, "update": 0.927, "loss": "1.086", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "106.27", "wer_total": "126.68", "n_error": "20.405", "ppl": "2.12", "accuracy": "83.889", "wer": "16.108", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "139800", "lr": "0.000275464", "gnorm": "3.365", "loss_scale": "256", "train_wall": "367", "gb_free": "6.5", "wall": "393584"} [2024-07-10 14:10:06,654][train_inner][INFO] - {"epoch": 1, "update": 0.928, "loss": "1.097", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "107.16", "wer_total": "127.42", "n_error": "20.25", "ppl": "2.14", "accuracy": "84.1", "wer": "15.892", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "140000", "lr": "0.00027464", "gnorm": "3.366", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "393951"} [2024-07-10 14:10:06,655][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 14:52:00,324][valid][INFO] - {"epoch": 1, "valid_loss": "0.932", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.6148", "valid_wer_total": "18.1585", "valid_n_error": "2.54277", "valid_ppl": "1.91", "valid_accuracy": "85.992", "valid_wer": "14.003", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "140000", "valid_best_accuracy": "86.259"} [2024-07-10 14:52:00,324][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 140000 updates [2024-07-10 14:52:00,325][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_140000.pt [2024-07-10 14:52:03,486][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_140000.pt [2024-07-10 14:52:05,605][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_140000.pt (epoch 1 @ 140000 updates, score 85.992) (writing took 5.280085590900853 seconds) [2024-07-10 14:58:12,660][train_inner][INFO] - {"epoch": 1, "update": 0.93, "loss": "1.098", "ntokens": "127.08", "acc_total": "127.08", "n_correct": "106.965", "wer_total": "127.08", "n_error": "20.11", "ppl": "2.14", "accuracy": "84.171", "wer": "15.825", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "140200", "lr": "0.000273819", "gnorm": "3.521", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "396837"} [2024-07-10 15:04:19,745][train_inner][INFO] - {"epoch": 1, "update": 0.931, "loss": "1.101", "ntokens": "127.24", "acc_total": "127.24", "n_correct": "106.545", "wer_total": "127.24", "n_error": "20.69", "ppl": "2.15", "accuracy": "83.735", "wer": "16.261", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "140400", "lr": "0.000273", "gnorm": "3.381", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "397204"} [2024-07-10 15:10:26,951][train_inner][INFO] - {"epoch": 1, "update": 0.932, "loss": "1.076", "ntokens": "127.61", "acc_total": "127.61", "n_correct": "107.53", "wer_total": "127.61", "n_error": "20.075", "ppl": "2.11", "accuracy": "84.265", "wer": "15.732", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "140600", "lr": "0.000272183", "gnorm": "2.967", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "397571"} [2024-07-10 15:13:12,056][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-10 15:16:35,808][train_inner][INFO] - {"epoch": 1, "update": 0.934, "loss": "1.097", "ntokens": "126.955", "acc_total": "126.955", "n_correct": "106.86", "wer_total": "126.955", "n_error": "20.09", "ppl": "2.14", "accuracy": "84.172", "wer": "15.825", "wps": "68.8", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "140800", "lr": "0.000271369", "gnorm": "3.319", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "397940"} [2024-07-10 15:22:43,079][train_inner][INFO] - {"epoch": 1, "update": 0.935, "loss": "1.125", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "106.25", "wer_total": "126.81", "n_error": "20.555", "ppl": "2.18", "accuracy": "83.787", "wer": "16.209", "wps": "69.1", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "141000", "lr": "0.000270557", "gnorm": "3.417", "loss_scale": "256", "train_wall": "367", "gb_free": "6.5", "wall": "398307"} [2024-07-10 15:28:51,483][train_inner][INFO] - {"epoch": 1, "update": 0.936, "loss": "1.123", "ntokens": "126.73", "acc_total": "126.73", "n_correct": "106.09", "wer_total": "126.73", "n_error": "20.625", "ppl": "2.18", "accuracy": "83.713", "wer": "16.275", "wps": "68.8", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "141200", "lr": "0.000269748", "gnorm": "3.118", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "398676"} [2024-07-10 15:34:58,454][train_inner][INFO] - {"epoch": 1, "update": 0.938, "loss": "1.062", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "107.055", "wer_total": "126.855", "n_error": "19.785", "ppl": "2.09", "accuracy": "84.392", "wer": "15.597", "wps": "69.1", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "141400", "lr": "0.000268941", "gnorm": "3.272", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "399043"} [2024-07-10 15:41:05,865][train_inner][INFO] - {"epoch": 1, "update": 0.939, "loss": "1.086", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "106.36", "wer_total": "126.835", "n_error": "20.47", "ppl": "2.12", "accuracy": "83.857", "wer": "16.139", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "141600", "lr": "0.000268136", "gnorm": "3.421", "loss_scale": "256", "train_wall": "367", "gb_free": "6.5", "wall": "399410"} [2024-07-10 15:47:12,824][train_inner][INFO] - {"epoch": 1, "update": 0.94, "loss": "1.114", "ntokens": "126.64", "acc_total": "126.64", "n_correct": "106.135", "wer_total": "126.64", "n_error": "20.485", "ppl": "2.17", "accuracy": "83.808", "wer": "16.176", "wps": "69", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "141800", "lr": "0.000267334", "gnorm": "3.19", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "399777"} [2024-07-10 15:53:19,826][train_inner][INFO] - {"epoch": 1, "update": 0.942, "loss": "1.111", "ntokens": "126.12", "acc_total": "126.12", "n_correct": "105.575", "wer_total": "126.12", "n_error": "20.54", "ppl": "2.16", "accuracy": "83.71", "wer": "16.286", "wps": "68.7", "ups": "0.54", "wpb": "126.1", "bsz": "8", "num_updates": "142000", "lr": "0.000266535", "gnorm": "3.199", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "400144"} [2024-07-10 15:59:26,949][train_inner][INFO] - {"epoch": 1, "update": 0.943, "loss": "1.096", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "107.48", "wer_total": "127.41", "n_error": "19.92", "ppl": "2.14", "accuracy": "84.358", "wer": "15.635", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "142200", "lr": "0.000265737", "gnorm": "3.478", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "400511"} [2024-07-10 16:05:34,110][train_inner][INFO] - {"epoch": 1, "update": 0.944, "loss": "1.084", "ntokens": "127.56", "acc_total": "127.56", "n_correct": "107.59", "wer_total": "127.56", "n_error": "19.97", "ppl": "2.12", "accuracy": "84.345", "wer": "15.655", "wps": "69.5", "ups": "0.54", "wpb": "127.6", "bsz": "8", "num_updates": "142400", "lr": "0.000264943", "gnorm": "3.219", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "400878"} [2024-07-10 16:08:37,693][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 16:50:29,720][valid][INFO] - {"epoch": 1, "valid_loss": "0.898", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.6592", "valid_wer_total": "18.1585", "valid_n_error": "2.49837", "valid_ppl": "1.86", "valid_accuracy": "86.236", "valid_wer": "13.759", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "142500", "valid_best_accuracy": "86.259"} [2024-07-10 16:50:29,721][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 142500 updates [2024-07-10 16:50:29,721][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_142500.pt [2024-07-10 16:50:32,927][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_142500.pt [2024-07-10 16:50:35,032][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_142500.pt (epoch 1 @ 142500 updates, score 86.236) (writing took 5.311112678027712 seconds) [2024-07-10 16:53:38,375][train_inner][INFO] - {"epoch": 1, "update": 0.946, "loss": "1.118", "ntokens": "126.42", "acc_total": "126.42", "n_correct": "106.31", "wer_total": "126.42", "n_error": "20.095", "ppl": "2.17", "accuracy": "84.093", "wer": "15.895", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "142600", "lr": "0.00026415", "gnorm": "3.369", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "403762"} [2024-07-10 16:59:45,688][train_inner][INFO] - {"epoch": 1, "update": 0.947, "loss": "1.049", "ntokens": "127.3", "acc_total": "127.3", "n_correct": "107.57", "wer_total": "127.3", "n_error": "19.725", "ppl": "2.07", "accuracy": "84.501", "wer": "15.495", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "142800", "lr": "0.00026336", "gnorm": "3.056", "loss_scale": "512", "train_wall": "367", "gb_free": "6.5", "wall": "404130"} [2024-07-10 17:05:52,642][train_inner][INFO] - {"epoch": 1, "update": 0.948, "loss": "1.082", "ntokens": "126.545", "acc_total": "126.545", "n_correct": "106.575", "wer_total": "126.545", "n_error": "19.965", "ppl": "2.12", "accuracy": "84.219", "wer": "15.777", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "143000", "lr": "0.000262572", "gnorm": "3.276", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "404497"} [2024-07-10 17:11:58,973][train_inner][INFO] - {"epoch": 1, "update": 0.95, "loss": "1.087", "ntokens": "126.74", "acc_total": "126.74", "n_correct": "106.64", "wer_total": "126.74", "n_error": "20.09", "ppl": "2.12", "accuracy": "84.141", "wer": "15.851", "wps": "69.2", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "143200", "lr": "0.000261787", "gnorm": "3.186", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "404863"} [2024-07-10 17:18:05,772][train_inner][INFO] - {"epoch": 1, "update": 0.951, "loss": "1.084", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "107.56", "wer_total": "127.33", "n_error": "19.77", "ppl": "2.12", "accuracy": "84.473", "wer": "15.527", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "143400", "lr": "0.000261004", "gnorm": "3.423", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "405230"} [2024-07-10 17:24:12,546][train_inner][INFO] - {"epoch": 1, "update": 0.952, "loss": "1.13", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "105.57", "wer_total": "125.9", "n_error": "20.325", "ppl": "2.19", "accuracy": "83.852", "wer": "16.144", "wps": "68.7", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "143600", "lr": "0.000260223", "gnorm": "3.401", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "405597"} [2024-07-10 17:28:01,707][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-10 17:30:21,127][train_inner][INFO] - {"epoch": 1, "update": 0.954, "loss": "1.095", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "106.28", "wer_total": "126.18", "n_error": "19.89", "ppl": "2.14", "accuracy": "84.229", "wer": "15.763", "wps": "68.5", "ups": "0.54", "wpb": "126.2", "bsz": "8", "num_updates": "143800", "lr": "0.000259444", "gnorm": "3.285", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "405965"} [2024-07-10 17:36:27,756][train_inner][INFO] - {"epoch": 1, "update": 0.955, "loss": "1.031", "ntokens": "126.38", "acc_total": "126.38", "n_correct": "107.015", "wer_total": "126.38", "n_error": "19.36", "ppl": "2.04", "accuracy": "84.677", "wer": "15.319", "wps": "68.9", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "144000", "lr": "0.000258668", "gnorm": "3.281", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "406332"} [2024-07-10 17:42:34,567][train_inner][INFO] - {"epoch": 1, "update": 0.956, "loss": "1.132", "ntokens": "125.985", "acc_total": "125.985", "n_correct": "105.635", "wer_total": "125.985", "n_error": "20.34", "ppl": "2.19", "accuracy": "83.847", "wer": "16.145", "wps": "68.7", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "144200", "lr": "0.000257895", "gnorm": "3.438", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "406699"} [2024-07-10 17:48:41,011][train_inner][INFO] - {"epoch": 1, "update": 0.958, "loss": "1.084", "ntokens": "127.865", "acc_total": "127.865", "n_correct": "108.035", "wer_total": "127.865", "n_error": "19.81", "ppl": "2.12", "accuracy": "84.491", "wer": "15.493", "wps": "69.8", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "144400", "lr": "0.000257123", "gnorm": "3.446", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "407065"} [2024-07-10 17:54:49,260][train_inner][INFO] - {"epoch": 1, "update": 0.959, "loss": "1.082", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "106.845", "wer_total": "126.77", "n_error": "19.915", "ppl": "2.12", "accuracy": "84.283", "wer": "15.71", "wps": "68.9", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "144600", "lr": "0.000256354", "gnorm": "3.442", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "407433"} [2024-07-10 18:00:55,946][train_inner][INFO] - {"epoch": 1, "update": 0.96, "loss": "1.168", "ntokens": "126.89", "acc_total": "126.89", "n_correct": "106.08", "wer_total": "126.89", "n_error": "20.805", "ppl": "2.25", "accuracy": "83.6", "wer": "16.396", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "144800", "lr": "0.000255587", "gnorm": "3.456", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "407800"} [2024-07-10 18:07:02,645][train_inner][INFO] - {"epoch": 1, "update": 0.962, "loss": "1.116", "ntokens": "126.04", "acc_total": "126.04", "n_correct": "105.52", "wer_total": "126.04", "n_error": "20.51", "ppl": "2.17", "accuracy": "83.719", "wer": "16.273", "wps": "68.7", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "145000", "lr": "0.000254823", "gnorm": "3.502", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "408167"} [2024-07-10 18:07:02,646][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 18:48:54,814][valid][INFO] - {"epoch": 1, "valid_loss": "0.881", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.6444", "valid_wer_total": "18.1585", "valid_n_error": "2.51284", "valid_ppl": "1.84", "valid_accuracy": "86.155", "valid_wer": "13.838", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "145000", "valid_best_accuracy": "86.259"} [2024-07-10 18:48:54,815][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 145000 updates [2024-07-10 18:48:54,815][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_145000.pt [2024-07-10 18:48:57,973][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_145000.pt [2024-07-10 18:48:59,946][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_145000.pt (epoch 1 @ 145000 updates, score 86.155) (writing took 5.131747712963261 seconds) [2024-07-10 18:55:06,502][train_inner][INFO] - {"epoch": 1, "update": 0.963, "loss": "1.061", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "106.64", "wer_total": "126.835", "n_error": "20.175", "ppl": "2.09", "accuracy": "84.078", "wer": "15.906", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "145200", "lr": "0.000254061", "gnorm": "3.195", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "411051"} [2024-07-10 19:01:13,191][train_inner][INFO] - {"epoch": 1, "update": 0.964, "loss": "1.097", "ntokens": "126.35", "acc_total": "126.35", "n_correct": "106.395", "wer_total": "126.35", "n_error": "19.95", "ppl": "2.14", "accuracy": "84.207", "wer": "15.789", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "145400", "lr": "0.000253301", "gnorm": "3.829", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "411417"} [2024-07-10 19:07:19,888][train_inner][INFO] - {"epoch": 1, "update": 0.966, "loss": "1.006", "ntokens": "126.175", "acc_total": "126.175", "n_correct": "107.575", "wer_total": "126.175", "n_error": "18.595", "ppl": "2.01", "accuracy": "85.259", "wer": "14.737", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "145600", "lr": "0.000252543", "gnorm": "3.162", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "411784"} [2024-07-10 19:13:26,845][train_inner][INFO] - {"epoch": 1, "update": 0.967, "loss": "1.047", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "108.255", "wer_total": "127.785", "n_error": "19.53", "ppl": "2.07", "accuracy": "84.717", "wer": "15.283", "wps": "69.6", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "145800", "lr": "0.000251787", "gnorm": "3.413", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "412151"} [2024-07-10 19:19:33,714][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "1.064", "ntokens": "127.31", "acc_total": "127.31", "n_correct": "107.31", "wer_total": "127.31", "n_error": "20", "ppl": "2.09", "accuracy": "84.29", "wer": "15.71", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "146000", "lr": "0.000251034", "gnorm": "3.285", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "412518"} [2024-07-10 19:25:41,979][train_inner][INFO] - {"epoch": 1, "update": 0.97, "loss": "1.069", "ntokens": "126.35", "acc_total": "126.35", "n_correct": "106.515", "wer_total": "126.35", "n_error": "19.83", "ppl": "2.1", "accuracy": "84.302", "wer": "15.694", "wps": "68.6", "ups": "0.54", "wpb": "126.3", "bsz": "8", "num_updates": "146200", "lr": "0.000250283", "gnorm": "3.617", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "412886"} [2024-07-10 19:31:48,634][train_inner][INFO] - {"epoch": 1, "update": 0.971, "loss": "1.119", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "105.83", "wer_total": "126.265", "n_error": "20.43", "ppl": "2.17", "accuracy": "83.816", "wer": "16.18", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "146400", "lr": "0.000249535", "gnorm": "3.743", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "413253"} [2024-07-10 19:37:55,719][train_inner][INFO] - {"epoch": 1, "update": 0.972, "loss": "1.079", "ntokens": "127.81", "acc_total": "127.81", "n_correct": "107.51", "wer_total": "127.81", "n_error": "20.295", "ppl": "2.11", "accuracy": "84.117", "wer": "15.879", "wps": "69.6", "ups": "0.54", "wpb": "127.8", "bsz": "8", "num_updates": "146600", "lr": "0.000248788", "gnorm": "3.46", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "413620"} [2024-07-10 19:44:02,094][train_inner][INFO] - {"epoch": 1, "update": 0.974, "loss": "1.046", "ntokens": "125.115", "acc_total": "125.115", "n_correct": "105.85", "wer_total": "125.115", "n_error": "19.26", "ppl": "2.07", "accuracy": "84.602", "wer": "15.394", "wps": "68.3", "ups": "0.55", "wpb": "125.1", "bsz": "8", "num_updates": "146800", "lr": "0.000248044", "gnorm": "3.445", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "413986"} [2024-07-10 19:50:08,973][train_inner][INFO] - {"epoch": 1, "update": 0.975, "loss": "1.012", "ntokens": "127.3", "acc_total": "127.3", "n_correct": "108.235", "wer_total": "127.3", "n_error": "19.06", "ppl": "2.02", "accuracy": "85.024", "wer": "14.973", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "147000", "lr": "0.000247302", "gnorm": "3.159", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "414353"} [2024-07-10 19:56:15,669][train_inner][INFO] - {"epoch": 1, "update": 0.976, "loss": "1.087", "ntokens": "126.155", "acc_total": "126.155", "n_correct": "106.275", "wer_total": "126.155", "n_error": "19.86", "ppl": "2.12", "accuracy": "84.242", "wer": "15.743", "wps": "68.8", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "147200", "lr": "0.000246562", "gnorm": "3.34", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "414720"} [2024-07-10 20:02:21,938][train_inner][INFO] - {"epoch": 1, "update": 0.977, "loss": "1.056", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "106.425", "wer_total": "125.995", "n_error": "19.555", "ppl": "2.08", "accuracy": "84.468", "wer": "15.52", "wps": "68.8", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "147400", "lr": "0.000245825", "gnorm": "3.546", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "415086"} [2024-07-10 20:05:25,532][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 20:47:16,977][valid][INFO] - {"epoch": 1, "valid_loss": "0.91", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.5449", "valid_wer_total": "18.1585", "valid_n_error": "2.6128", "valid_ppl": "1.88", "valid_accuracy": "85.607", "valid_wer": "14.389", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "147500", "valid_best_accuracy": "86.259"} [2024-07-10 20:47:16,978][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 147500 updates [2024-07-10 20:47:16,978][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_147500.pt [2024-07-10 20:47:20,175][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_147500.pt [2024-07-10 20:47:22,245][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_147500.pt (epoch 1 @ 147500 updates, score 85.607) (writing took 5.267562343971804 seconds) [2024-07-10 20:50:25,323][train_inner][INFO] - {"epoch": 1, "update": 0.979, "loss": "1.101", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "107.315", "wer_total": "127.695", "n_error": "20.38", "ppl": "2.15", "accuracy": "84.04", "wer": "15.96", "wps": "8.9", "ups": "0.07", "wpb": "127.7", "bsz": "8", "num_updates": "147600", "lr": "0.00024509", "gnorm": "3.51", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "417969"} [2024-07-10 20:56:32,059][train_inner][INFO] - {"epoch": 1, "update": 0.98, "loss": "1.078", "ntokens": "128.64", "acc_total": "128.64", "n_correct": "108.085", "wer_total": "128.64", "n_error": "20.55", "ppl": "2.11", "accuracy": "84.021", "wer": "15.975", "wps": "70.2", "ups": "0.55", "wpb": "128.6", "bsz": "8", "num_updates": "147800", "lr": "0.000244356", "gnorm": "3.708", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "418336"} [2024-07-10 21:01:38,195][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-10 21:02:40,488][train_inner][INFO] - {"epoch": 1, "update": 0.981, "loss": "1.099", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "106.795", "wer_total": "126.905", "n_error": "20.1", "ppl": "2.14", "accuracy": "84.154", "wer": "15.839", "wps": "68.9", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "148000", "lr": "0.000243626", "gnorm": "3.357", "loss_scale": "512", "train_wall": "368", "gb_free": "6.5", "wall": "418705"} [2024-07-10 21:08:47,873][train_inner][INFO] - {"epoch": 1, "update": 0.983, "loss": "1.058", "ntokens": "128.135", "acc_total": "128.135", "n_correct": "108.285", "wer_total": "128.135", "n_error": "19.83", "ppl": "2.08", "accuracy": "84.509", "wer": "15.476", "wps": "69.8", "ups": "0.54", "wpb": "128.1", "bsz": "8", "num_updates": "148200", "lr": "0.000242897", "gnorm": "3.572", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "419072"} [2024-07-10 21:14:55,379][train_inner][INFO] - {"epoch": 1, "update": 0.984, "loss": "1.063", "ntokens": "127.24", "acc_total": "127.24", "n_correct": "107.985", "wer_total": "127.24", "n_error": "19.255", "ppl": "2.09", "accuracy": "84.867", "wer": "15.133", "wps": "69.2", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "148400", "lr": "0.00024217", "gnorm": "3.468", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "419439"} [2024-07-10 21:21:01,840][train_inner][INFO] - {"epoch": 1, "update": 0.985, "loss": "1.086", "ntokens": "126.16", "acc_total": "126.16", "n_correct": "106.175", "wer_total": "126.16", "n_error": "19.98", "ppl": "2.12", "accuracy": "84.159", "wer": "15.837", "wps": "68.9", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "148600", "lr": "0.000241446", "gnorm": "3.473", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "419806"} [2024-07-10 21:26:22,505][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-10 21:27:10,104][train_inner][INFO] - {"epoch": 1, "update": 0.987, "loss": "1.063", "ntokens": "126.455", "acc_total": "126.455", "n_correct": "107.08", "wer_total": "126.455", "n_error": "19.375", "ppl": "2.09", "accuracy": "84.678", "wer": "15.322", "wps": "68.7", "ups": "0.54", "wpb": "126.5", "bsz": "8", "num_updates": "148800", "lr": "0.000240724", "gnorm": "3.291", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "420174"} [2024-07-10 21:33:16,795][train_inner][INFO] - {"epoch": 1, "update": 0.988, "loss": "1.051", "ntokens": "126.495", "acc_total": "126.495", "n_correct": "107.005", "wer_total": "126.495", "n_error": "19.48", "ppl": "2.07", "accuracy": "84.592", "wer": "15.4", "wps": "69", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "149000", "lr": "0.000240004", "gnorm": "3.218", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "420541"} [2024-07-10 21:39:23,375][train_inner][INFO] - {"epoch": 1, "update": 0.989, "loss": "1.043", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "107.475", "wer_total": "127.075", "n_error": "19.595", "ppl": "2.06", "accuracy": "84.576", "wer": "15.42", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "149200", "lr": "0.000239286", "gnorm": "3.237", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "420907"} [2024-07-10 21:45:30,201][train_inner][INFO] - {"epoch": 1, "update": 0.991, "loss": "1.059", "ntokens": "128.575", "acc_total": "128.575", "n_correct": "108.635", "wer_total": "128.575", "n_error": "19.93", "ppl": "2.08", "accuracy": "84.492", "wer": "15.501", "wps": "70.1", "ups": "0.55", "wpb": "128.6", "bsz": "8", "num_updates": "149400", "lr": "0.00023857", "gnorm": "3.379", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "421274"} [2024-07-10 21:51:36,670][train_inner][INFO] - {"epoch": 1, "update": 0.992, "loss": "1.037", "ntokens": "126.84", "acc_total": "126.84", "n_correct": "107.685", "wer_total": "126.84", "n_error": "19.15", "ppl": "2.05", "accuracy": "84.898", "wer": "15.098", "wps": "69.2", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "149600", "lr": "0.000237856", "gnorm": "3.253", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "421641"} [2024-07-10 21:57:43,006][train_inner][INFO] - {"epoch": 1, "update": 0.993, "loss": "1.03", "ntokens": "127.53", "acc_total": "127.53", "n_correct": "108.22", "wer_total": "127.53", "n_error": "19.31", "ppl": "2.04", "accuracy": "84.858", "wer": "15.142", "wps": "69.6", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "149800", "lr": "0.000237145", "gnorm": "3.308", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "422007"} [2024-07-10 22:03:49,259][train_inner][INFO] - {"epoch": 1, "update": 0.995, "loss": "1.06", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "107.555", "wer_total": "126.705", "n_error": "19.15", "ppl": "2.08", "accuracy": "84.886", "wer": "15.114", "wps": "69.2", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "150000", "lr": "0.000236435", "gnorm": "3.375", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "422373"} [2024-07-10 22:03:49,260][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 22:45:41,394][valid][INFO] - {"epoch": 1, "valid_loss": "0.876", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.7296", "valid_wer_total": "18.1585", "valid_n_error": "2.42835", "valid_ppl": "1.83", "valid_accuracy": "86.624", "valid_wer": "13.373", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "150000", "valid_best_accuracy": "86.624"} [2024-07-10 22:45:41,395][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 150000 updates [2024-07-10 22:45:41,395][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_150000.pt [2024-07-10 22:45:44,570][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_150000.pt [2024-07-10 22:45:48,671][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_150000.pt (epoch 1 @ 150000 updates, score 86.624) (writing took 7.275759109994397 seconds) [2024-07-10 22:51:55,109][train_inner][INFO] - {"epoch": 1, "update": 0.996, "loss": "1.007", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "108.48", "wer_total": "126.805", "n_error": "18.315", "ppl": "2.01", "accuracy": "85.549", "wer": "14.443", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "150200", "lr": "0.000235728", "gnorm": "3.374", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "425259"} [2024-07-10 22:58:02,344][train_inner][INFO] - {"epoch": 1, "update": 0.997, "loss": "1.017", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "108.08", "wer_total": "127.245", "n_error": "19.155", "ppl": "2.02", "accuracy": "84.939", "wer": "15.054", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "150400", "lr": "0.000235023", "gnorm": "3.07", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "425626"} [2024-07-10 23:04:08,913][train_inner][INFO] - {"epoch": 1, "update": 0.999, "loss": "1.057", "ntokens": "126.29", "acc_total": "126.29", "n_correct": "106.715", "wer_total": "126.29", "n_error": "19.565", "ppl": "2.08", "accuracy": "84.5", "wer": "15.492", "wps": "68.9", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "150600", "lr": "0.00023432", "gnorm": "3.71", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "425993"} [2024-07-10 23:10:04,904][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-10 23:51:52,505][valid][INFO] - {"epoch": 1, "valid_loss": "0.858", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.753", "valid_wer_total": "18.1585", "valid_n_error": "2.40454", "valid_ppl": "1.81", "valid_accuracy": "86.753", "valid_wer": "13.242", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "150795", "valid_best_accuracy": "86.753"} [2024-07-10 23:51:52,506][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 150795 updates [2024-07-10 23:51:52,507][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_best.pt [2024-07-10 23:51:56,285][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_best.pt [2024-07-10 23:51:58,628][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 1 @ 150795 updates, score 86.753) (writing took 6.1219219180056825 seconds) [2024-07-10 23:51:58,629][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below) [2024-07-10 23:51:58,631][train][INFO] - {"epoch": 1, "train_loss": "1.611", "train_ntokens": "126.889", "train_acc_total": "126.889", "train_n_correct": "97.1357", "train_wer_total": "126.889", "train_n_error": "29.7198", "train_ppl": "3.06", "train_accuracy": "76.552", "train_wer": "23.422", "train_wps": "44.6", "train_ups": "0.35", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "150795", "train_lr": "0.000233637", "train_gnorm": "4.876", "train_loss_scale": "256", "train_train_wall": "274448", "train_gb_free": "6.5", "train_wall": "428863"} [2024-07-10 23:51:59,001][fairseq.trainer][INFO] - begin training epoch 2 [2024-07-10 23:51:59,002][fairseq_cli.train][INFO] - Start iterating over samples [2024-07-10 23:52:08,149][train_inner][INFO] - {"epoch": 2, "update": 1.0, "loss": "1.031", "ntokens": "126.54", "acc_total": "126.54", "n_correct": "107.235", "wer_total": "126.54", "n_error": "19.3", "ppl": "2.04", "accuracy": "84.744", "wer": "15.252", "wps": "8.8", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "150800", "lr": "0.000233619", "gnorm": "3.239", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "428872"} [2024-07-10 23:58:13,110][train_inner][INFO] - {"epoch": 2, "update": 1.001, "loss": "0.996", "ntokens": "128.095", "acc_total": "128.095", "n_correct": "109.27", "wer_total": "128.095", "n_error": "18.825", "ppl": "1.99", "accuracy": "85.304", "wer": "14.696", "wps": "70.2", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "151000", "lr": "0.00023292", "gnorm": "3.342", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "429237"} [2024-07-11 00:04:17,933][train_inner][INFO] - {"epoch": 2, "update": 1.003, "loss": "1.03", "ntokens": "126.67", "acc_total": "126.67", "n_correct": "107.615", "wer_total": "126.67", "n_error": "19.055", "ppl": "2.04", "accuracy": "84.957", "wer": "15.043", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "151200", "lr": "0.000232224", "gnorm": "3.205", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "429602"} [2024-07-11 00:06:18,389][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-11 00:10:24,678][train_inner][INFO] - {"epoch": 2, "update": 1.004, "loss": "0.951", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "108.94", "wer_total": "127.345", "n_error": "18.4", "ppl": "1.93", "accuracy": "85.547", "wer": "14.449", "wps": "69.4", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "151400", "lr": "0.000231529", "gnorm": "3.188", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "429969"} [2024-07-11 00:16:29,547][train_inner][INFO] - {"epoch": 2, "update": 1.005, "loss": "0.97", "ntokens": "127.76", "acc_total": "127.76", "n_correct": "109.39", "wer_total": "127.76", "n_error": "18.355", "ppl": "1.96", "accuracy": "85.621", "wer": "14.367", "wps": "70", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "151600", "lr": "0.000230836", "gnorm": "3.211", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "430334"} [2024-07-11 00:22:34,541][train_inner][INFO] - {"epoch": 2, "update": 1.007, "loss": "0.976", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "109.38", "wer_total": "127.35", "n_error": "17.955", "ppl": "1.97", "accuracy": "85.889", "wer": "14.099", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "151800", "lr": "0.000230146", "gnorm": "2.986", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "430699"} [2024-07-11 00:28:39,340][train_inner][INFO] - {"epoch": 2, "update": 1.008, "loss": "0.995", "ntokens": "125.98", "acc_total": "125.98", "n_correct": "107.395", "wer_total": "125.98", "n_error": "18.57", "ppl": "1.99", "accuracy": "85.248", "wer": "14.74", "wps": "69.1", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "152000", "lr": "0.000229457", "gnorm": "3.135", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "431063"} [2024-07-11 00:34:44,291][train_inner][INFO] - {"epoch": 2, "update": 1.009, "loss": "1.039", "ntokens": "127.18", "acc_total": "127.18", "n_correct": "108.22", "wer_total": "127.18", "n_error": "18.96", "ppl": "2.05", "accuracy": "85.092", "wer": "14.908", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "152200", "lr": "0.000228771", "gnorm": "3.164", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "431428"} [2024-07-11 00:40:49,345][train_inner][INFO] - {"epoch": 2, "update": 1.011, "loss": "1.008", "ntokens": "126.09", "acc_total": "126.09", "n_correct": "107.475", "wer_total": "126.09", "n_error": "18.605", "ppl": "2.01", "accuracy": "85.237", "wer": "14.755", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "152400", "lr": "0.000228087", "gnorm": "3.531", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "431793"} [2024-07-11 00:43:51,973][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 01:25:42,408][valid][INFO] - {"epoch": 2, "valid_loss": "0.861", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.7805", "valid_wer_total": "18.1585", "valid_n_error": "2.37745", "valid_ppl": "1.82", "valid_accuracy": "86.904", "valid_wer": "13.093", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "152500", "valid_best_accuracy": "86.904"} [2024-07-11 01:25:42,408][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 152500 updates [2024-07-11 01:25:42,409][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_152500.pt [2024-07-11 01:25:45,609][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_152500.pt [2024-07-11 01:25:49,952][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_152500.pt (epoch 2 @ 152500 updates, score 86.904) (writing took 7.543481839005835 seconds) [2024-07-11 01:28:52,213][train_inner][INFO] - {"epoch": 2, "update": 1.012, "loss": "0.983", "ntokens": "127.29", "acc_total": "127.29", "n_correct": "108.86", "wer_total": "127.29", "n_error": "18.43", "ppl": "1.98", "accuracy": "85.521", "wer": "14.479", "wps": "8.8", "ups": "0.07", "wpb": "127.3", "bsz": "8", "num_updates": "152600", "lr": "0.000227405", "gnorm": "3.071", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "434676"} [2024-07-11 01:34:57,246][train_inner][INFO] - {"epoch": 2, "update": 1.013, "loss": "0.968", "ntokens": "126.65", "acc_total": "126.65", "n_correct": "108.555", "wer_total": "126.65", "n_error": "18.09", "ppl": "1.96", "accuracy": "85.713", "wer": "14.283", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "152800", "lr": "0.000226724", "gnorm": "3.135", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "435041"} [2024-07-11 01:41:02,309][train_inner][INFO] - {"epoch": 2, "update": 1.015, "loss": "1.01", "ntokens": "127.58", "acc_total": "127.58", "n_correct": "108.635", "wer_total": "127.58", "n_error": "18.94", "ppl": "2.01", "accuracy": "85.15", "wer": "14.846", "wps": "69.9", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "153000", "lr": "0.000226046", "gnorm": "3.59", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "435406"} [2024-07-11 01:47:07,127][train_inner][INFO] - {"epoch": 2, "update": 1.016, "loss": "0.928", "ntokens": "126.105", "acc_total": "126.105", "n_correct": "108.905", "wer_total": "126.105", "n_error": "17.195", "ppl": "1.9", "accuracy": "86.361", "wer": "13.635", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "153200", "lr": "0.00022537", "gnorm": "3.039", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "435771"} [2024-07-11 01:53:12,196][train_inner][INFO] - {"epoch": 2, "update": 1.017, "loss": "0.964", "ntokens": "127.445", "acc_total": "127.445", "n_correct": "109.245", "wer_total": "127.445", "n_error": "18.19", "ppl": "1.95", "accuracy": "85.719", "wer": "14.273", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "153400", "lr": "0.000224696", "gnorm": "3.193", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "436136"} [2024-07-11 01:59:16,934][train_inner][INFO] - {"epoch": 2, "update": 1.019, "loss": "0.975", "ntokens": "125.89", "acc_total": "125.89", "n_correct": "107.195", "wer_total": "125.89", "n_error": "18.695", "ppl": "1.97", "accuracy": "85.15", "wer": "14.85", "wps": "69", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "153600", "lr": "0.000224024", "gnorm": "3.692", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "436501"} [2024-07-11 02:05:21,751][train_inner][INFO] - {"epoch": 2, "update": 1.02, "loss": "0.953", "ntokens": "127.01", "acc_total": "127.01", "n_correct": "109.105", "wer_total": "127.01", "n_error": "17.895", "ppl": "1.94", "accuracy": "85.903", "wer": "14.089", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "153800", "lr": "0.000223354", "gnorm": "3.237", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "436866"} [2024-07-11 02:11:26,592][train_inner][INFO] - {"epoch": 2, "update": 1.021, "loss": "0.945", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "108.665", "wer_total": "126.735", "n_error": "18.07", "ppl": "1.93", "accuracy": "85.742", "wer": "14.258", "wps": "69.5", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "154000", "lr": "0.000222685", "gnorm": "3.221", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "437231"} [2024-07-11 02:17:31,431][train_inner][INFO] - {"epoch": 2, "update": 1.023, "loss": "0.984", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "108.18", "wer_total": "126.86", "n_error": "18.67", "ppl": "1.98", "accuracy": "85.275", "wer": "14.717", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "154200", "lr": "0.000222019", "gnorm": "2.992", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "437595"} [2024-07-11 02:23:36,379][train_inner][INFO] - {"epoch": 2, "update": 1.024, "loss": "0.986", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "108.87", "wer_total": "127.345", "n_error": "18.47", "ppl": "1.98", "accuracy": "85.492", "wer": "14.504", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "154400", "lr": "0.000221355", "gnorm": "3.188", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "437960"} [2024-07-11 02:29:41,124][train_inner][INFO] - {"epoch": 2, "update": 1.025, "loss": "0.939", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "109.225", "wer_total": "127.06", "n_error": "17.825", "ppl": "1.92", "accuracy": "85.963", "wer": "14.029", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "154600", "lr": "0.000220693", "gnorm": "3.292", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "438325"} [2024-07-11 02:35:46,096][train_inner][INFO] - {"epoch": 2, "update": 1.027, "loss": "0.996", "ntokens": "127.045", "acc_total": "127.045", "n_correct": "108.45", "wer_total": "127.045", "n_error": "18.59", "ppl": "2", "accuracy": "85.363", "wer": "14.633", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "154800", "lr": "0.000220033", "gnorm": "3.257", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "438690"} [2024-07-11 02:41:50,929][train_inner][INFO] - {"epoch": 2, "update": 1.028, "loss": "1.01", "ntokens": "127.68", "acc_total": "127.68", "n_correct": "108.63", "wer_total": "127.68", "n_error": "19.05", "ppl": "2.01", "accuracy": "85.08", "wer": "14.92", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "155000", "lr": "0.000219375", "gnorm": "3.33", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "439055"} [2024-07-11 02:41:50,930][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 03:23:40,896][valid][INFO] - {"epoch": 2, "valid_loss": "0.856", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.7619", "valid_wer_total": "18.1585", "valid_n_error": "2.39575", "valid_ppl": "1.81", "valid_accuracy": "86.802", "valid_wer": "13.194", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "155000", "valid_best_accuracy": "86.904"} [2024-07-11 03:23:40,896][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 155000 updates [2024-07-11 03:23:40,896][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_155000.pt [2024-07-11 03:23:44,086][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_155000.pt [2024-07-11 03:23:46,466][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_155000.pt (epoch 2 @ 155000 updates, score 86.802) (writing took 5.569720147992484 seconds) [2024-07-11 03:29:51,077][train_inner][INFO] - {"epoch": 2, "update": 1.029, "loss": "0.981", "ntokens": "125.195", "acc_total": "125.195", "n_correct": "107.12", "wer_total": "125.195", "n_error": "18.07", "ppl": "1.97", "accuracy": "85.563", "wer": "14.433", "wps": "8.7", "ups": "0.07", "wpb": "125.2", "bsz": "8", "num_updates": "155200", "lr": "0.000218719", "gnorm": "3.085", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "441935"} [2024-07-11 03:35:56,134][train_inner][INFO] - {"epoch": 2, "update": 1.031, "loss": "0.909", "ntokens": "128.045", "acc_total": "128.045", "n_correct": "110.615", "wer_total": "128.045", "n_error": "17.425", "ppl": "1.88", "accuracy": "86.388", "wer": "13.608", "wps": "70.2", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "155400", "lr": "0.000218064", "gnorm": "3.024", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "442300"} [2024-07-11 03:42:01,536][train_inner][INFO] - {"epoch": 2, "update": 1.032, "loss": "0.972", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "108.24", "wer_total": "126.68", "n_error": "18.42", "ppl": "1.96", "accuracy": "85.444", "wer": "14.541", "wps": "69.3", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "155600", "lr": "0.000217412", "gnorm": "3.253", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "442666"} [2024-07-11 03:48:06,444][train_inner][INFO] - {"epoch": 2, "update": 1.033, "loss": "0.976", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "109.525", "wer_total": "127.695", "n_error": "18.16", "ppl": "1.97", "accuracy": "85.771", "wer": "14.221", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "155800", "lr": "0.000216762", "gnorm": "3.056", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "443030"} [2024-07-11 03:54:11,334][train_inner][INFO] - {"epoch": 2, "update": 1.035, "loss": "0.994", "ntokens": "126.275", "acc_total": "126.275", "n_correct": "107.815", "wer_total": "126.275", "n_error": "18.455", "ppl": "1.99", "accuracy": "85.381", "wer": "14.615", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "156000", "lr": "0.000216113", "gnorm": "3.344", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "443395"} [2024-07-11 04:00:16,611][train_inner][INFO] - {"epoch": 2, "update": 1.036, "loss": "0.981", "ntokens": "127.215", "acc_total": "127.215", "n_correct": "108.92", "wer_total": "127.215", "n_error": "18.28", "ppl": "1.97", "accuracy": "85.619", "wer": "14.369", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "156200", "lr": "0.000215467", "gnorm": "3.013", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "443761"} [2024-07-11 04:06:21,847][train_inner][INFO] - {"epoch": 2, "update": 1.037, "loss": "0.983", "ntokens": "126.64", "acc_total": "126.64", "n_correct": "108.055", "wer_total": "126.64", "n_error": "18.585", "ppl": "1.98", "accuracy": "85.325", "wer": "14.675", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "156400", "lr": "0.000214822", "gnorm": "3.035", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "444126"} [2024-07-11 04:12:26,676][train_inner][INFO] - {"epoch": 2, "update": 1.038, "loss": "0.965", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "107.86", "wer_total": "126.01", "n_error": "18.145", "ppl": "1.95", "accuracy": "85.596", "wer": "14.4", "wps": "69.1", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "156600", "lr": "0.00021418", "gnorm": "2.988", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "444491"} [2024-07-11 04:18:31,762][train_inner][INFO] - {"epoch": 2, "update": 1.04, "loss": "0.989", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "108.14", "wer_total": "126.975", "n_error": "18.825", "ppl": "1.99", "accuracy": "85.166", "wer": "14.826", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "156800", "lr": "0.000213539", "gnorm": "3.133", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "444856"} [2024-07-11 04:21:37,972][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-11 04:24:38,493][train_inner][INFO] - {"epoch": 2, "update": 1.041, "loss": "0.977", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "108.395", "wer_total": "126.875", "n_error": "18.465", "ppl": "1.97", "accuracy": "85.434", "wer": "14.554", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "157000", "lr": "0.0002129", "gnorm": "2.965", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "445223"} [2024-07-11 04:30:43,355][train_inner][INFO] - {"epoch": 2, "update": 1.042, "loss": "0.972", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "107.535", "wer_total": "126.18", "n_error": "18.64", "ppl": "1.96", "accuracy": "85.223", "wer": "14.773", "wps": "69.2", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "157200", "lr": "0.000212264", "gnorm": "3.189", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "445587"} [2024-07-11 04:36:48,320][train_inner][INFO] - {"epoch": 2, "update": 1.044, "loss": "0.969", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "108.89", "wer_total": "126.875", "n_error": "17.98", "ppl": "1.96", "accuracy": "85.825", "wer": "14.171", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "157400", "lr": "0.000211629", "gnorm": "2.843", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "445952"} [2024-07-11 04:39:50,853][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 05:21:40,196][valid][INFO] - {"epoch": 2, "valid_loss": "0.838", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.7736", "valid_wer_total": "18.1585", "valid_n_error": "2.38412", "valid_ppl": "1.79", "valid_accuracy": "86.866", "valid_wer": "13.13", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "157500", "valid_best_accuracy": "86.904"} [2024-07-11 05:21:40,197][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 157500 updates [2024-07-11 05:21:40,197][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_157500.pt [2024-07-11 05:21:43,367][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_157500.pt [2024-07-11 05:21:45,698][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_157500.pt (epoch 2 @ 157500 updates, score 86.866) (writing took 5.500662614009343 seconds) [2024-07-11 05:24:48,121][train_inner][INFO] - {"epoch": 2, "update": 1.045, "loss": "1.024", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "108.06", "wer_total": "127.385", "n_error": "19.325", "ppl": "2.03", "accuracy": "84.829", "wer": "15.171", "wps": "8.8", "ups": "0.07", "wpb": "127.4", "bsz": "8", "num_updates": "157600", "lr": "0.000210996", "gnorm": "3.281", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "448832"} [2024-07-11 05:30:52,658][train_inner][INFO] - {"epoch": 2, "update": 1.046, "loss": "1.001", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "107.765", "wer_total": "126.265", "n_error": "18.495", "ppl": "2", "accuracy": "85.348", "wer": "14.648", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "157800", "lr": "0.000210364", "gnorm": "2.913", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "449197"} [2024-07-11 05:36:57,035][train_inner][INFO] - {"epoch": 2, "update": 1.048, "loss": "0.987", "ntokens": "127.67", "acc_total": "127.67", "n_correct": "109.34", "wer_total": "127.67", "n_error": "18.33", "ppl": "1.98", "accuracy": "85.643", "wer": "14.357", "wps": "70.1", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "158000", "lr": "0.000209735", "gnorm": "2.985", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "449561"} [2024-07-11 05:43:01,402][train_inner][INFO] - {"epoch": 2, "update": 1.049, "loss": "0.954", "ntokens": "127.085", "acc_total": "127.085", "n_correct": "109.16", "wer_total": "127.085", "n_error": "17.92", "ppl": "1.94", "accuracy": "85.895", "wer": "14.101", "wps": "69.8", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "158200", "lr": "0.000209108", "gnorm": "2.984", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "449925"} [2024-07-11 05:49:05,858][train_inner][INFO] - {"epoch": 2, "update": 1.05, "loss": "0.958", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "108.765", "wer_total": "126.78", "n_error": "18.005", "ppl": "1.94", "accuracy": "85.79", "wer": "14.202", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "158400", "lr": "0.000208482", "gnorm": "3.063", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "450290"} [2024-07-11 05:55:10,175][train_inner][INFO] - {"epoch": 2, "update": 1.052, "loss": "0.976", "ntokens": "126.145", "acc_total": "126.145", "n_correct": "108.05", "wer_total": "126.145", "n_error": "18.08", "ppl": "1.97", "accuracy": "85.655", "wer": "14.333", "wps": "69.3", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "158600", "lr": "0.000207859", "gnorm": "3.213", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "450654"} [2024-07-11 06:01:14,242][train_inner][INFO] - {"epoch": 2, "update": 1.053, "loss": "0.974", "ntokens": "126.14", "acc_total": "126.14", "n_correct": "107.935", "wer_total": "126.14", "n_error": "18.205", "ppl": "1.96", "accuracy": "85.568", "wer": "14.432", "wps": "69.3", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "158800", "lr": "0.000207237", "gnorm": "3.256", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "451018"} [2024-07-11 06:07:18,435][train_inner][INFO] - {"epoch": 2, "update": 1.054, "loss": "0.951", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "108.52", "wer_total": "126.56", "n_error": "18.025", "ppl": "1.93", "accuracy": "85.746", "wer": "14.242", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "159000", "lr": "0.000206617", "gnorm": "2.928", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "451382"} [2024-07-11 06:13:22,614][train_inner][INFO] - {"epoch": 2, "update": 1.056, "loss": "0.973", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "108.19", "wer_total": "126.795", "n_error": "18.59", "ppl": "1.96", "accuracy": "85.327", "wer": "14.661", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "159200", "lr": "0.000205999", "gnorm": "2.968", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "451747"} [2024-07-11 06:19:26,777][train_inner][INFO] - {"epoch": 2, "update": 1.057, "loss": "0.976", "ntokens": "126.235", "acc_total": "126.235", "n_correct": "108.29", "wer_total": "126.235", "n_error": "17.945", "ppl": "1.97", "accuracy": "85.784", "wer": "14.216", "wps": "69.3", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "159400", "lr": "0.000205383", "gnorm": "3.033", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "452111"} [2024-07-11 06:25:31,175][train_inner][INFO] - {"epoch": 2, "update": 1.058, "loss": "0.967", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "108.72", "wer_total": "126.59", "n_error": "17.87", "ppl": "1.95", "accuracy": "85.884", "wer": "14.116", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "159600", "lr": "0.000204768", "gnorm": "3.193", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "452475"} [2024-07-11 06:31:35,361][train_inner][INFO] - {"epoch": 2, "update": 1.06, "loss": "0.961", "ntokens": "127.105", "acc_total": "127.105", "n_correct": "108.855", "wer_total": "127.105", "n_error": "18.235", "ppl": "1.95", "accuracy": "85.642", "wer": "14.346", "wps": "69.8", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "159800", "lr": "0.000204156", "gnorm": "2.935", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "452839"} [2024-07-11 06:37:39,852][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "0.973", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "108.53", "wer_total": "126.97", "n_error": "18.435", "ppl": "1.96", "accuracy": "85.477", "wer": "14.519", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "160000", "lr": "0.000203545", "gnorm": "3.095", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "453204"} [2024-07-11 06:37:39,852][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 07:19:22,493][valid][INFO] - {"epoch": 2, "valid_loss": "0.823", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.7755", "valid_wer_total": "18.1585", "valid_n_error": "2.38212", "valid_ppl": "1.77", "valid_accuracy": "86.877", "valid_wer": "13.118", "valid_wps": "174.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "160000", "valid_best_accuracy": "86.904"} [2024-07-11 07:19:22,494][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 160000 updates [2024-07-11 07:19:22,494][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_160000.pt [2024-07-11 07:19:25,679][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_160000.pt [2024-07-11 07:19:28,012][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_160000.pt (epoch 2 @ 160000 updates, score 86.877) (writing took 5.518252587993629 seconds) [2024-07-11 07:25:32,030][train_inner][INFO] - {"epoch": 2, "update": 1.062, "loss": "0.924", "ntokens": "126.025", "acc_total": "126.025", "n_correct": "108.515", "wer_total": "126.025", "n_error": "17.5", "ppl": "1.9", "accuracy": "86.106", "wer": "13.886", "wps": "8.8", "ups": "0.07", "wpb": "126", "bsz": "8", "num_updates": "160200", "lr": "0.000202936", "gnorm": "2.966", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "456076"} [2024-07-11 07:31:36,562][train_inner][INFO] - {"epoch": 2, "update": 1.064, "loss": "0.995", "ntokens": "126.39", "acc_total": "126.39", "n_correct": "107.765", "wer_total": "126.39", "n_error": "18.62", "ppl": "1.99", "accuracy": "85.264", "wer": "14.732", "wps": "69.3", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "160400", "lr": "0.000202329", "gnorm": "3.04", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "456441"} [2024-07-11 07:34:11,122][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-11 07:37:42,338][train_inner][INFO] - {"epoch": 2, "update": 1.065, "loss": "0.931", "ntokens": "126.755", "acc_total": "126.755", "n_correct": "109.135", "wer_total": "126.755", "n_error": "17.615", "ppl": "1.91", "accuracy": "86.099", "wer": "13.897", "wps": "69.3", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "160600", "lr": "0.000201724", "gnorm": "2.835", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "456806"} [2024-07-11 07:43:46,526][train_inner][INFO] - {"epoch": 2, "update": 1.066, "loss": "0.959", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "109.38", "wer_total": "127.49", "n_error": "18.11", "ppl": "1.94", "accuracy": "85.795", "wer": "14.205", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "160800", "lr": "0.000201121", "gnorm": "3.122", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "457171"} [2024-07-11 07:49:50,714][train_inner][INFO] - {"epoch": 2, "update": 1.068, "loss": "0.943", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "109.145", "wer_total": "127.1", "n_error": "17.95", "ppl": "1.92", "accuracy": "85.873", "wer": "14.123", "wps": "69.8", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "161000", "lr": "0.000200519", "gnorm": "3.137", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "457535"} [2024-07-11 07:55:54,988][train_inner][INFO] - {"epoch": 2, "update": 1.069, "loss": "0.912", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "109.27", "wer_total": "126.855", "n_error": "17.585", "ppl": "1.88", "accuracy": "86.138", "wer": "13.862", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "161200", "lr": "0.000199919", "gnorm": "2.972", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "457899"} [2024-07-11 08:01:59,170][train_inner][INFO] - {"epoch": 2, "update": 1.07, "loss": "1.007", "ntokens": "127.53", "acc_total": "127.53", "n_correct": "108.635", "wer_total": "127.53", "n_error": "18.89", "ppl": "2.01", "accuracy": "85.184", "wer": "14.812", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "161400", "lr": "0.000199321", "gnorm": "3.151", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "458263"} [2024-07-11 08:08:03,717][train_inner][INFO] - {"epoch": 2, "update": 1.072, "loss": "0.942", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "109.26", "wer_total": "126.9", "n_error": "17.635", "ppl": "1.92", "accuracy": "86.099", "wer": "13.897", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "161600", "lr": "0.000198725", "gnorm": "3.014", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "458628"} [2024-07-11 08:14:08,091][train_inner][INFO] - {"epoch": 2, "update": 1.073, "loss": "0.933", "ntokens": "126.385", "acc_total": "126.385", "n_correct": "108.785", "wer_total": "126.385", "n_error": "17.585", "ppl": "1.91", "accuracy": "86.074", "wer": "13.914", "wps": "69.4", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "161800", "lr": "0.000198131", "gnorm": "3.011", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "458992"} [2024-07-11 08:20:12,414][train_inner][INFO] - {"epoch": 2, "update": 1.074, "loss": "0.956", "ntokens": "126.16", "acc_total": "126.16", "n_correct": "108.35", "wer_total": "126.16", "n_error": "17.81", "ppl": "1.94", "accuracy": "85.883", "wer": "14.117", "wps": "69.3", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "162000", "lr": "0.000197538", "gnorm": "3.282", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "459356"} [2024-07-11 08:26:16,930][train_inner][INFO] - {"epoch": 2, "update": 1.076, "loss": "0.952", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "108.76", "wer_total": "126.58", "n_error": "17.805", "ppl": "1.93", "accuracy": "85.922", "wer": "14.066", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "162200", "lr": "0.000196947", "gnorm": "2.918", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "459721"} [2024-07-11 08:32:21,209][train_inner][INFO] - {"epoch": 2, "update": 1.077, "loss": "0.967", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "109.035", "wer_total": "127.385", "n_error": "18.33", "ppl": "1.95", "accuracy": "85.595", "wer": "14.389", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "162400", "lr": "0.000196358", "gnorm": "2.785", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "460085"} [2024-07-11 08:35:23,466][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 09:17:10,114][valid][INFO] - {"epoch": 2, "valid_loss": "0.823", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.8132", "valid_wer_total": "18.1585", "valid_n_error": "2.34473", "valid_ppl": "1.77", "valid_accuracy": "87.084", "valid_wer": "12.913", "valid_wps": "173.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "162500", "valid_best_accuracy": "87.084"} [2024-07-11 09:17:10,114][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 162500 updates [2024-07-11 09:17:10,115][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_162500.pt [2024-07-11 09:17:13,315][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_162500.pt [2024-07-11 09:17:17,591][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_162500.pt (epoch 2 @ 162500 updates, score 87.084) (writing took 7.4768850930267945 seconds) [2024-07-11 09:19:41,290][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-11 09:20:21,409][train_inner][INFO] - {"epoch": 2, "update": 1.078, "loss": "0.939", "ntokens": "126.745", "acc_total": "126.745", "n_correct": "108.89", "wer_total": "126.745", "n_error": "17.85", "ppl": "1.92", "accuracy": "85.913", "wer": "14.083", "wps": "8.8", "ups": "0.07", "wpb": "126.7", "bsz": "8", "num_updates": "162600", "lr": "0.000195771", "gnorm": "3.001", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "462965"} [2024-07-11 09:26:26,349][train_inner][INFO] - {"epoch": 2, "update": 1.08, "loss": "0.967", "ntokens": "126.16", "acc_total": "126.16", "n_correct": "108.185", "wer_total": "126.16", "n_error": "17.965", "ppl": "1.96", "accuracy": "85.752", "wer": "14.24", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "162800", "lr": "0.000195185", "gnorm": "3.079", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "463330"} [2024-07-11 09:27:28,370][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-11 09:32:32,635][train_inner][INFO] - {"epoch": 2, "update": 1.081, "loss": "0.924", "ntokens": "126.89", "acc_total": "126.89", "n_correct": "109.065", "wer_total": "126.89", "n_error": "17.82", "ppl": "1.9", "accuracy": "85.952", "wer": "14.044", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "163000", "lr": "0.000194601", "gnorm": "2.92", "loss_scale": "256", "train_wall": "366", "gb_free": "6.5", "wall": "463697"} [2024-07-11 09:38:37,138][train_inner][INFO] - {"epoch": 2, "update": 1.082, "loss": "0.964", "ntokens": "126.04", "acc_total": "126.04", "n_correct": "108.465", "wer_total": "126.04", "n_error": "17.57", "ppl": "1.95", "accuracy": "86.056", "wer": "13.94", "wps": "69.2", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "163200", "lr": "0.000194019", "gnorm": "2.938", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "464061"} [2024-07-11 09:44:41,993][train_inner][INFO] - {"epoch": 2, "update": 1.084, "loss": "0.97", "ntokens": "127.455", "acc_total": "127.455", "n_correct": "108.88", "wer_total": "127.455", "n_error": "18.57", "ppl": "1.96", "accuracy": "85.426", "wer": "14.57", "wps": "69.9", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "163400", "lr": "0.000193439", "gnorm": "2.932", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "464426"} [2024-07-11 09:50:46,945][train_inner][INFO] - {"epoch": 2, "update": 1.085, "loss": "0.947", "ntokens": "126.145", "acc_total": "126.145", "n_correct": "108.325", "wer_total": "126.145", "n_error": "17.805", "ppl": "1.93", "accuracy": "85.873", "wer": "14.115", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "163600", "lr": "0.00019286", "gnorm": "3.545", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "464791"} [2024-07-11 09:56:51,966][train_inner][INFO] - {"epoch": 2, "update": 1.086, "loss": "0.946", "ntokens": "126.925", "acc_total": "126.925", "n_correct": "109.18", "wer_total": "126.925", "n_error": "17.74", "ppl": "1.93", "accuracy": "86.019", "wer": "13.977", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "163800", "lr": "0.000192283", "gnorm": "2.931", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "465156"} [2024-07-11 10:02:57,313][train_inner][INFO] - {"epoch": 2, "update": 1.088, "loss": "0.943", "ntokens": "127.735", "acc_total": "127.735", "n_correct": "109.7", "wer_total": "127.735", "n_error": "18.03", "ppl": "1.92", "accuracy": "85.881", "wer": "14.115", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "164000", "lr": "0.000191708", "gnorm": "2.821", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "465521"} [2024-07-11 10:09:02,448][train_inner][INFO] - {"epoch": 2, "update": 1.089, "loss": "0.965", "ntokens": "125.105", "acc_total": "125.105", "n_correct": "107.41", "wer_total": "125.105", "n_error": "17.69", "ppl": "1.95", "accuracy": "85.856", "wer": "14.14", "wps": "68.5", "ups": "0.55", "wpb": "125.1", "bsz": "8", "num_updates": "164200", "lr": "0.000191135", "gnorm": "3.197", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "465887"} [2024-07-11 10:15:07,638][train_inner][INFO] - {"epoch": 2, "update": 1.09, "loss": "0.926", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "109.755", "wer_total": "127.025", "n_error": "17.265", "ppl": "1.9", "accuracy": "86.404", "wer": "13.592", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "164400", "lr": "0.000190563", "gnorm": "2.768", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "466252"} [2024-07-11 10:21:12,977][train_inner][INFO] - {"epoch": 2, "update": 1.092, "loss": "0.965", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "109.05", "wer_total": "126.835", "n_error": "17.775", "ppl": "1.95", "accuracy": "85.978", "wer": "14.014", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "164600", "lr": "0.000189993", "gnorm": "2.985", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "466617"} [2024-07-11 10:27:18,526][train_inner][INFO] - {"epoch": 2, "update": 1.093, "loss": "0.923", "ntokens": "128.1", "acc_total": "128.1", "n_correct": "110.13", "wer_total": "128.1", "n_error": "17.965", "ppl": "1.9", "accuracy": "85.972", "wer": "14.024", "wps": "70.1", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "164800", "lr": "0.000189425", "gnorm": "2.758", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "466983"} [2024-07-11 10:33:23,916][train_inner][INFO] - {"epoch": 2, "update": 1.094, "loss": "0.936", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "108.875", "wer_total": "126.405", "n_error": "17.53", "ppl": "1.91", "accuracy": "86.132", "wer": "13.868", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "165000", "lr": "0.000188858", "gnorm": "2.735", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "467348"} [2024-07-11 10:33:23,916][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 11:15:14,647][valid][INFO] - {"epoch": 2, "valid_loss": "0.82", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.8284", "valid_wer_total": "18.1585", "valid_n_error": "2.32976", "valid_ppl": "1.77", "valid_accuracy": "87.168", "valid_wer": "12.83", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "165000", "valid_best_accuracy": "87.168"} [2024-07-11 11:15:14,648][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 165000 updates [2024-07-11 11:15:14,648][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_165000.pt [2024-07-11 11:15:17,826][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_165000.pt [2024-07-11 11:15:22,019][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_165000.pt (epoch 2 @ 165000 updates, score 87.168) (writing took 7.370717352023348 seconds) [2024-07-11 11:21:26,345][train_inner][INFO] - {"epoch": 2, "update": 1.096, "loss": "0.893", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "109.655", "wer_total": "126.92", "n_error": "17.245", "ppl": "1.86", "accuracy": "86.397", "wer": "13.587", "wps": "8.8", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "165200", "lr": "0.000188293", "gnorm": "2.562", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "470230"} [2024-07-11 11:27:30,509][train_inner][INFO] - {"epoch": 2, "update": 1.097, "loss": "0.915", "ntokens": "126.885", "acc_total": "126.885", "n_correct": "109.33", "wer_total": "126.885", "n_error": "17.555", "ppl": "1.89", "accuracy": "86.165", "wer": "13.835", "wps": "69.7", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "165400", "lr": "0.00018773", "gnorm": "2.946", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "470595"} [2024-07-11 11:33:34,247][train_inner][INFO] - {"epoch": 2, "update": 1.098, "loss": "0.912", "ntokens": "127.66", "acc_total": "127.66", "n_correct": "110.325", "wer_total": "127.66", "n_error": "17.325", "ppl": "1.88", "accuracy": "86.421", "wer": "13.571", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "165600", "lr": "0.000187168", "gnorm": "2.675", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "470958"} [2024-07-11 11:39:37,643][train_inner][INFO] - {"epoch": 2, "update": 1.1, "loss": "0.944", "ntokens": "126.13", "acc_total": "126.13", "n_correct": "108.42", "wer_total": "126.13", "n_error": "17.7", "ppl": "1.92", "accuracy": "85.959", "wer": "14.033", "wps": "69.4", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "165800", "lr": "0.000186608", "gnorm": "2.997", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "471322"} [2024-07-11 11:45:41,220][train_inner][INFO] - {"epoch": 2, "update": 1.101, "loss": "0.977", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "107.95", "wer_total": "126.405", "n_error": "18.445", "ppl": "1.97", "accuracy": "85.4", "wer": "14.592", "wps": "69.5", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "166000", "lr": "0.00018605", "gnorm": "3.018", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "471685"} [2024-07-11 11:51:44,818][train_inner][INFO] - {"epoch": 2, "update": 1.102, "loss": "0.96", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "108.315", "wer_total": "126.265", "n_error": "17.945", "ppl": "1.94", "accuracy": "85.784", "wer": "14.212", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "166200", "lr": "0.000185494", "gnorm": "2.888", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "472049"} [2024-07-11 11:57:48,475][train_inner][INFO] - {"epoch": 2, "update": 1.103, "loss": "0.892", "ntokens": "125.48", "acc_total": "125.48", "n_correct": "108.75", "wer_total": "125.48", "n_error": "16.73", "ppl": "1.86", "accuracy": "86.667", "wer": "13.333", "wps": "69", "ups": "0.55", "wpb": "125.5", "bsz": "8", "num_updates": "166400", "lr": "0.000184939", "gnorm": "2.774", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "472413"} [2024-07-11 12:03:52,382][train_inner][INFO] - {"epoch": 2, "update": 1.105, "loss": "0.984", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "108.77", "wer_total": "126.805", "n_error": "18.035", "ppl": "1.98", "accuracy": "85.777", "wer": "14.223", "wps": "69.7", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "166600", "lr": "0.000184386", "gnorm": "2.868", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "472776"} [2024-07-11 12:09:56,156][train_inner][INFO] - {"epoch": 2, "update": 1.106, "loss": "0.897", "ntokens": "126.34", "acc_total": "126.34", "n_correct": "109.235", "wer_total": "126.34", "n_error": "17.095", "ppl": "1.86", "accuracy": "86.461", "wer": "13.531", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "166800", "lr": "0.000183834", "gnorm": "2.633", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "473140"} [2024-07-11 12:15:59,864][train_inner][INFO] - {"epoch": 2, "update": 1.107, "loss": "0.931", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "108.99", "wer_total": "126.58", "n_error": "17.58", "ppl": "1.91", "accuracy": "86.104", "wer": "13.888", "wps": "69.6", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "167000", "lr": "0.000183284", "gnorm": "2.772", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "473504"} [2024-07-11 12:22:03,885][train_inner][INFO] - {"epoch": 2, "update": 1.109, "loss": "0.959", "ntokens": "125.895", "acc_total": "125.895", "n_correct": "108.085", "wer_total": "125.895", "n_error": "17.805", "ppl": "1.94", "accuracy": "85.853", "wer": "14.143", "wps": "69.2", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "167200", "lr": "0.000182736", "gnorm": "2.94", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "473868"} [2024-07-11 12:28:08,195][train_inner][INFO] - {"epoch": 2, "update": 1.11, "loss": "0.946", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "109.685", "wer_total": "127.615", "n_error": "17.93", "ppl": "1.93", "accuracy": "85.95", "wer": "14.05", "wps": "70.1", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "167400", "lr": "0.000182189", "gnorm": "2.798", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "474232"} [2024-07-11 12:31:10,370][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 13:12:52,370][valid][INFO] - {"epoch": 2, "valid_loss": "0.8", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.893", "valid_wer_total": "18.1585", "valid_n_error": "2.26507", "valid_ppl": "1.74", "valid_accuracy": "87.524", "valid_wer": "12.474", "valid_wps": "174.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "167500", "valid_best_accuracy": "87.524"} [2024-07-11 13:12:52,371][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 167500 updates [2024-07-11 13:12:52,371][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_167500.pt [2024-07-11 13:12:55,638][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_167500.pt [2024-07-11 13:13:02,606][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_167500.pt (epoch 2 @ 167500 updates, score 87.524) (writing took 10.235124910948798 seconds) [2024-07-11 13:16:04,408][train_inner][INFO] - {"epoch": 2, "update": 1.111, "loss": "0.898", "ntokens": "126.565", "acc_total": "126.565", "n_correct": "109.355", "wer_total": "126.565", "n_error": "17.2", "ppl": "1.86", "accuracy": "86.402", "wer": "13.59", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "167600", "lr": "0.000181644", "gnorm": "2.744", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "477108"} [2024-07-11 13:22:08,602][train_inner][INFO] - {"epoch": 2, "update": 1.113, "loss": "0.952", "ntokens": "127.04", "acc_total": "127.04", "n_correct": "109.175", "wer_total": "127.04", "n_error": "17.865", "ppl": "1.93", "accuracy": "85.938", "wer": "14.062", "wps": "69.8", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "167800", "lr": "0.000181101", "gnorm": "2.866", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "477473"} [2024-07-11 13:23:01,387][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-11 13:28:14,406][train_inner][INFO] - {"epoch": 2, "update": 1.114, "loss": "0.92", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "109.21", "wer_total": "126.555", "n_error": "17.34", "ppl": "1.89", "accuracy": "86.294", "wer": "13.702", "wps": "69.2", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "168000", "lr": "0.000180559", "gnorm": "2.74", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "477838"} [2024-07-11 13:34:18,248][train_inner][INFO] - {"epoch": 2, "update": 1.115, "loss": "0.927", "ntokens": "126.75", "acc_total": "126.75", "n_correct": "108.92", "wer_total": "126.75", "n_error": "17.825", "ppl": "1.9", "accuracy": "85.933", "wer": "14.063", "wps": "69.7", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "168200", "lr": "0.000180019", "gnorm": "2.69", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "478202"} [2024-07-11 13:40:22,508][train_inner][INFO] - {"epoch": 2, "update": 1.117, "loss": "0.934", "ntokens": "126.72", "acc_total": "126.72", "n_correct": "109.095", "wer_total": "126.72", "n_error": "17.62", "ppl": "1.91", "accuracy": "86.091", "wer": "13.905", "wps": "69.6", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "168400", "lr": "0.000179481", "gnorm": "2.851", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "478567"} [2024-07-11 13:46:26,869][train_inner][INFO] - {"epoch": 2, "update": 1.118, "loss": "0.907", "ntokens": "127.815", "acc_total": "127.815", "n_correct": "110.49", "wer_total": "127.815", "n_error": "17.315", "ppl": "1.88", "accuracy": "86.445", "wer": "13.547", "wps": "70.2", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "168600", "lr": "0.000178944", "gnorm": "2.684", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "478931"} [2024-07-11 13:52:31,229][train_inner][INFO] - {"epoch": 2, "update": 1.119, "loss": "0.893", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "110.535", "wer_total": "127.595", "n_error": "17.055", "ppl": "1.86", "accuracy": "86.63", "wer": "13.367", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "168800", "lr": "0.000178409", "gnorm": "2.696", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "479295"} [2024-07-11 13:58:35,511][train_inner][INFO] - {"epoch": 2, "update": 1.121, "loss": "0.885", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "109.51", "wer_total": "126.725", "n_error": "17.215", "ppl": "1.85", "accuracy": "86.415", "wer": "13.585", "wps": "69.6", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "169000", "lr": "0.000177875", "gnorm": "2.826", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "479660"} [2024-07-11 14:04:39,905][train_inner][INFO] - {"epoch": 2, "update": 1.122, "loss": "0.897", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "109.575", "wer_total": "126.53", "n_error": "16.955", "ppl": "1.86", "accuracy": "86.6", "wer": "13.4", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "169200", "lr": "0.000177343", "gnorm": "2.791", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "480024"} [2024-07-11 14:10:44,315][train_inner][INFO] - {"epoch": 2, "update": 1.123, "loss": "0.914", "ntokens": "126.695", "acc_total": "126.695", "n_correct": "109.15", "wer_total": "126.695", "n_error": "17.545", "ppl": "1.88", "accuracy": "86.152", "wer": "13.848", "wps": "69.5", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "169400", "lr": "0.000176812", "gnorm": "2.931", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "480388"} [2024-07-11 14:16:48,807][train_inner][INFO] - {"epoch": 2, "update": 1.125, "loss": "0.875", "ntokens": "126.165", "acc_total": "126.165", "n_correct": "109.625", "wer_total": "126.165", "n_error": "16.54", "ppl": "1.83", "accuracy": "86.89", "wer": "13.11", "wps": "69.2", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "169600", "lr": "0.000176283", "gnorm": "2.843", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "480753"} [2024-07-11 14:22:53,489][train_inner][INFO] - {"epoch": 2, "update": 1.126, "loss": "0.958", "ntokens": "127.89", "acc_total": "127.89", "n_correct": "109.585", "wer_total": "127.89", "n_error": "18.29", "ppl": "1.94", "accuracy": "85.687", "wer": "14.301", "wps": "70.1", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "169800", "lr": "0.000175756", "gnorm": "3.046", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "481118"} [2024-07-11 14:28:58,372][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "0.897", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "109.69", "wer_total": "126.9", "n_error": "17.21", "ppl": "1.86", "accuracy": "86.438", "wer": "13.562", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "170000", "lr": "0.00017523", "gnorm": "2.756", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "481482"} [2024-07-11 14:28:58,373][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 15:10:50,231][valid][INFO] - {"epoch": 2, "valid_loss": "0.793", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.8792", "valid_wer_total": "18.1585", "valid_n_error": "2.27874", "valid_ppl": "1.73", "valid_accuracy": "87.448", "valid_wer": "12.549", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "170000", "valid_best_accuracy": "87.524"} [2024-07-11 15:10:50,232][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 170000 updates [2024-07-11 15:10:50,232][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_170000.pt [2024-07-11 15:10:53,449][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_170000.pt [2024-07-11 15:10:55,568][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_170000.pt (epoch 2 @ 170000 updates, score 87.448) (writing took 5.335682459990494 seconds) [2024-07-11 15:17:00,769][train_inner][INFO] - {"epoch": 2, "update": 1.129, "loss": "0.96", "ntokens": "127.725", "acc_total": "127.725", "n_correct": "108.915", "wer_total": "127.725", "n_error": "18.805", "ppl": "1.95", "accuracy": "85.273", "wer": "14.723", "wps": "8.9", "ups": "0.07", "wpb": "127.7", "bsz": "8", "num_updates": "170200", "lr": "0.000174706", "gnorm": "2.911", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "484365"} [2024-07-11 15:20:58,260][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-11 15:23:07,923][train_inner][INFO] - {"epoch": 2, "update": 1.13, "loss": "0.896", "ntokens": "127.3", "acc_total": "127.3", "n_correct": "109.825", "wer_total": "127.3", "n_error": "17.465", "ppl": "1.86", "accuracy": "86.273", "wer": "13.72", "wps": "69.3", "ups": "0.54", "wpb": "127.3", "bsz": "8", "num_updates": "170400", "lr": "0.000174184", "gnorm": "2.826", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "484732"} [2024-07-11 15:29:13,186][train_inner][INFO] - {"epoch": 2, "update": 1.131, "loss": "0.903", "ntokens": "126.12", "acc_total": "126.12", "n_correct": "108.975", "wer_total": "126.12", "n_error": "17.145", "ppl": "1.87", "accuracy": "86.406", "wer": "13.594", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "170600", "lr": "0.000173663", "gnorm": "2.84", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "485097"} [2024-07-11 15:35:18,465][train_inner][INFO] - {"epoch": 2, "update": 1.133, "loss": "0.931", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "109.195", "wer_total": "126.625", "n_error": "17.425", "ppl": "1.91", "accuracy": "86.235", "wer": "13.761", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "170800", "lr": "0.000173143", "gnorm": "2.779", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "485463"} [2024-07-11 15:41:23,777][train_inner][INFO] - {"epoch": 2, "update": 1.134, "loss": "0.916", "ntokens": "126.745", "acc_total": "126.745", "n_correct": "109.34", "wer_total": "126.745", "n_error": "17.4", "ppl": "1.89", "accuracy": "86.268", "wer": "13.728", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "171000", "lr": "0.000172625", "gnorm": "2.822", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "485828"} [2024-07-11 15:47:29,641][train_inner][INFO] - {"epoch": 2, "update": 1.135, "loss": "0.934", "ntokens": "126.48", "acc_total": "126.48", "n_correct": "108.745", "wer_total": "126.48", "n_error": "17.725", "ppl": "1.91", "accuracy": "85.978", "wer": "14.014", "wps": "69.1", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "171200", "lr": "0.000172109", "gnorm": "3.008", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "486194"} [2024-07-11 15:53:35,051][train_inner][INFO] - {"epoch": 2, "update": 1.137, "loss": "0.909", "ntokens": "126.085", "acc_total": "126.085", "n_correct": "108.815", "wer_total": "126.085", "n_error": "17.265", "ppl": "1.88", "accuracy": "86.303", "wer": "13.693", "wps": "69", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "171400", "lr": "0.000171594", "gnorm": "3.001", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "486559"} [2024-07-11 15:59:40,472][train_inner][INFO] - {"epoch": 2, "update": 1.138, "loss": "0.93", "ntokens": "126.36", "acc_total": "126.36", "n_correct": "109.015", "wer_total": "126.36", "n_error": "17.345", "ppl": "1.9", "accuracy": "86.273", "wer": "13.727", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "171600", "lr": "0.000171081", "gnorm": "2.893", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "486925"} [2024-07-11 16:05:46,066][train_inner][INFO] - {"epoch": 2, "update": 1.139, "loss": "0.918", "ntokens": "127.325", "acc_total": "127.325", "n_correct": "109.47", "wer_total": "127.325", "n_error": "17.855", "ppl": "1.89", "accuracy": "85.977", "wer": "14.023", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "171800", "lr": "0.000170569", "gnorm": "2.887", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "487290"} [2024-07-11 16:11:51,662][train_inner][INFO] - {"epoch": 2, "update": 1.141, "loss": "0.918", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "109.49", "wer_total": "127.075", "n_error": "17.585", "ppl": "1.89", "accuracy": "86.162", "wer": "13.838", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "172000", "lr": "0.000170059", "gnorm": "2.961", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "487656"} [2024-07-11 16:17:57,401][train_inner][INFO] - {"epoch": 2, "update": 1.142, "loss": "0.929", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "109.6", "wer_total": "127.1", "n_error": "17.49", "ppl": "1.9", "accuracy": "86.231", "wer": "13.761", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "172200", "lr": "0.00016955", "gnorm": "2.887", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "488021"} [2024-07-11 16:24:03,136][train_inner][INFO] - {"epoch": 2, "update": 1.143, "loss": "0.898", "ntokens": "126.575", "acc_total": "126.575", "n_correct": "109.71", "wer_total": "126.575", "n_error": "16.86", "ppl": "1.86", "accuracy": "86.676", "wer": "13.32", "wps": "69.2", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "172400", "lr": "0.000169043", "gnorm": "2.885", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "488387"} [2024-07-11 16:27:05,875][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 17:08:59,608][valid][INFO] - {"epoch": 2, "valid_loss": "0.793", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.8847", "valid_wer_total": "18.1585", "valid_n_error": "2.2732", "valid_ppl": "1.73", "valid_accuracy": "87.478", "valid_wer": "12.519", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "172500", "valid_best_accuracy": "87.524"} [2024-07-11 17:08:59,609][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 172500 updates [2024-07-11 17:08:59,609][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_172500.pt [2024-07-11 17:09:02,792][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_172500.pt [2024-07-11 17:09:05,133][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_172500.pt (epoch 2 @ 172500 updates, score 87.478) (writing took 5.523956644930877 seconds) [2024-07-11 17:12:07,639][train_inner][INFO] - {"epoch": 2, "update": 1.145, "loss": "0.919", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "109.045", "wer_total": "126.555", "n_error": "17.505", "ppl": "1.89", "accuracy": "86.164", "wer": "13.832", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "172600", "lr": "0.000168537", "gnorm": "2.851", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "491272"} [2024-07-11 17:18:12,790][train_inner][INFO] - {"epoch": 2, "update": 1.146, "loss": "0.918", "ntokens": "126.455", "acc_total": "126.455", "n_correct": "109.37", "wer_total": "126.455", "n_error": "17.065", "ppl": "1.89", "accuracy": "86.489", "wer": "13.495", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "172800", "lr": "0.000168033", "gnorm": "2.944", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "491637"} [2024-07-11 17:24:18,423][train_inner][INFO] - {"epoch": 2, "update": 1.147, "loss": "0.906", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "109.955", "wer_total": "127.135", "n_error": "17.18", "ppl": "1.87", "accuracy": "86.487", "wer": "13.513", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "173000", "lr": "0.000167531", "gnorm": "2.635", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "492002"} [2024-07-11 17:30:23,803][train_inner][INFO] - {"epoch": 2, "update": 1.149, "loss": "0.871", "ntokens": "126.005", "acc_total": "126.005", "n_correct": "109.23", "wer_total": "126.005", "n_error": "16.77", "ppl": "1.83", "accuracy": "86.687", "wer": "13.309", "wps": "69", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "173200", "lr": "0.000167029", "gnorm": "2.88", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "492368"} [2024-07-11 17:36:29,270][train_inner][INFO] - {"epoch": 2, "update": 1.15, "loss": "0.905", "ntokens": "125.775", "acc_total": "125.775", "n_correct": "108.61", "wer_total": "125.775", "n_error": "17.155", "ppl": "1.87", "accuracy": "86.353", "wer": "13.639", "wps": "68.8", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "173400", "lr": "0.00016653", "gnorm": "2.939", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "492733"} [2024-07-11 17:42:35,060][train_inner][INFO] - {"epoch": 2, "update": 1.151, "loss": "0.876", "ntokens": "126.89", "acc_total": "126.89", "n_correct": "110.54", "wer_total": "126.89", "n_error": "16.35", "ppl": "1.83", "accuracy": "87.115", "wer": "12.885", "wps": "69.4", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "173600", "lr": "0.000166032", "gnorm": "2.796", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "493099"} [2024-07-11 17:48:40,784][train_inner][INFO] - {"epoch": 2, "update": 1.153, "loss": "0.942", "ntokens": "126.69", "acc_total": "126.69", "n_correct": "108.885", "wer_total": "126.69", "n_error": "17.8", "ppl": "1.92", "accuracy": "85.946", "wer": "14.05", "wps": "69.3", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "173800", "lr": "0.000165535", "gnorm": "3.052", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "493465"} [2024-07-11 17:54:46,519][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "0.909", "ntokens": "127.04", "acc_total": "127.04", "n_correct": "110.16", "wer_total": "127.04", "n_error": "16.87", "ppl": "1.88", "accuracy": "86.713", "wer": "13.279", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "174000", "lr": "0.00016504", "gnorm": "2.853", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "493831"} [2024-07-11 18:00:51,897][train_inner][INFO] - {"epoch": 2, "update": 1.155, "loss": "0.881", "ntokens": "125.645", "acc_total": "125.645", "n_correct": "109.07", "wer_total": "125.645", "n_error": "16.57", "ppl": "1.84", "accuracy": "86.808", "wer": "13.188", "wps": "68.8", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "174200", "lr": "0.000164546", "gnorm": "2.665", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "494196"} [2024-07-11 18:06:57,656][train_inner][INFO] - {"epoch": 2, "update": 1.157, "loss": "0.882", "ntokens": "127.88", "acc_total": "127.88", "n_correct": "110.765", "wer_total": "127.88", "n_error": "17.115", "ppl": "1.84", "accuracy": "86.616", "wer": "13.384", "wps": "69.9", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "174400", "lr": "0.000164054", "gnorm": "2.722", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "494562"} [2024-07-11 18:11:06,053][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-11 18:13:04,831][train_inner][INFO] - {"epoch": 2, "update": 1.158, "loss": "0.879", "ntokens": "126.655", "acc_total": "126.655", "n_correct": "109.825", "wer_total": "126.655", "n_error": "16.825", "ppl": "1.84", "accuracy": "86.712", "wer": "13.284", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "174600", "lr": "0.000163563", "gnorm": "2.733", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "494929"} [2024-07-11 18:19:10,362][train_inner][INFO] - {"epoch": 2, "update": 1.159, "loss": "0.875", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "110.25", "wer_total": "126.76", "n_error": "16.505", "ppl": "1.83", "accuracy": "86.975", "wer": "13.021", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "174800", "lr": "0.000163074", "gnorm": "2.718", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "495294"} [2024-07-11 18:22:27,611][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-11 18:25:17,469][train_inner][INFO] - {"epoch": 2, "update": 1.161, "loss": "0.918", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "110.225", "wer_total": "127.44", "n_error": "17.21", "ppl": "1.89", "accuracy": "86.492", "wer": "13.504", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "175000", "lr": "0.000162586", "gnorm": "2.679", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "495662"} [2024-07-11 18:25:17,470][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 19:07:10,230][valid][INFO] - {"epoch": 2, "valid_loss": "0.782", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.9523", "valid_wer_total": "18.1585", "valid_n_error": "2.20584", "valid_ppl": "1.72", "valid_accuracy": "87.85", "valid_wer": "12.148", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "175000", "valid_best_accuracy": "87.85"} [2024-07-11 19:07:10,231][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 175000 updates [2024-07-11 19:07:10,231][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_175000.pt [2024-07-11 19:07:13,388][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_175000.pt [2024-07-11 19:07:17,692][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_175000.pt (epoch 2 @ 175000 updates, score 87.85) (writing took 7.461607903009281 seconds) [2024-07-11 19:13:22,902][train_inner][INFO] - {"epoch": 2, "update": 1.162, "loss": "0.884", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "110.12", "wer_total": "126.775", "n_error": "16.645", "ppl": "1.85", "accuracy": "86.863", "wer": "13.13", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "175200", "lr": "0.0001621", "gnorm": "2.878", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "498547"} [2024-07-11 19:19:28,075][train_inner][INFO] - {"epoch": 2, "update": 1.163, "loss": "0.922", "ntokens": "126.215", "acc_total": "126.215", "n_correct": "109.015", "wer_total": "126.215", "n_error": "17.19", "ppl": "1.89", "accuracy": "86.372", "wer": "13.62", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "175400", "lr": "0.000161615", "gnorm": "2.847", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "498912"} [2024-07-11 19:25:33,091][train_inner][INFO] - {"epoch": 2, "update": 1.164, "loss": "0.873", "ntokens": "126.505", "acc_total": "126.505", "n_correct": "109.815", "wer_total": "126.505", "n_error": "16.685", "ppl": "1.83", "accuracy": "86.807", "wer": "13.189", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "175600", "lr": "0.000161132", "gnorm": "2.706", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "499277"} [2024-07-11 19:31:38,311][train_inner][INFO] - {"epoch": 2, "update": 1.166, "loss": "0.881", "ntokens": "127.935", "acc_total": "127.935", "n_correct": "111.015", "wer_total": "127.935", "n_error": "16.915", "ppl": "1.84", "accuracy": "86.775", "wer": "13.222", "wps": "70.1", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "175800", "lr": "0.00016065", "gnorm": "2.844", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "499642"} [2024-07-11 19:37:43,386][train_inner][INFO] - {"epoch": 2, "update": 1.167, "loss": "0.915", "ntokens": "126.145", "acc_total": "126.145", "n_correct": "109.03", "wer_total": "126.145", "n_error": "17.105", "ppl": "1.89", "accuracy": "86.432", "wer": "13.56", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "176000", "lr": "0.000160169", "gnorm": "2.789", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "500007"} [2024-07-11 19:43:48,296][train_inner][INFO] - {"epoch": 2, "update": 1.168, "loss": "0.907", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "109.985", "wer_total": "127.06", "n_error": "17.07", "ppl": "1.88", "accuracy": "86.561", "wer": "13.435", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "176200", "lr": "0.00015969", "gnorm": "2.679", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "500372"} [2024-07-11 19:49:53,196][train_inner][INFO] - {"epoch": 2, "update": 1.17, "loss": "0.907", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "110.015", "wer_total": "127.03", "n_error": "17", "ppl": "1.88", "accuracy": "86.606", "wer": "13.383", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "176400", "lr": "0.000159212", "gnorm": "2.71", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "500737"} [2024-07-11 19:55:58,352][train_inner][INFO] - {"epoch": 2, "update": 1.171, "loss": "0.898", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "109.55", "wer_total": "126.405", "n_error": "16.85", "ppl": "1.86", "accuracy": "86.666", "wer": "13.33", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "176600", "lr": "0.000158736", "gnorm": "2.796", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "501102"} [2024-07-11 20:02:03,395][train_inner][INFO] - {"epoch": 2, "update": 1.172, "loss": "0.885", "ntokens": "127.315", "acc_total": "127.315", "n_correct": "110.535", "wer_total": "127.315", "n_error": "16.78", "ppl": "1.85", "accuracy": "86.82", "wer": "13.18", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "176800", "lr": "0.000158261", "gnorm": "2.598", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "501467"} [2024-07-11 20:08:08,642][train_inner][INFO] - {"epoch": 2, "update": 1.174, "loss": "0.886", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "110.66", "wer_total": "127.09", "n_error": "16.43", "ppl": "1.85", "accuracy": "87.072", "wer": "12.928", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "177000", "lr": "0.000157788", "gnorm": "2.813", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "501833"} [2024-07-11 20:14:13,526][train_inner][INFO] - {"epoch": 2, "update": 1.175, "loss": "0.908", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "109.3", "wer_total": "126.265", "n_error": "16.965", "ppl": "1.88", "accuracy": "86.564", "wer": "13.436", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "177200", "lr": "0.000157316", "gnorm": "2.793", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "502198"} [2024-07-11 20:20:18,695][train_inner][INFO] - {"epoch": 2, "update": 1.176, "loss": "0.843", "ntokens": "127.905", "acc_total": "127.905", "n_correct": "111.595", "wer_total": "127.905", "n_error": "16.31", "ppl": "1.79", "accuracy": "87.248", "wer": "12.752", "wps": "70.1", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "177400", "lr": "0.000156845", "gnorm": "2.601", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "502563"} [2024-07-11 20:23:21,093][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 21:05:13,434][valid][INFO] - {"epoch": 2, "valid_loss": "0.763", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.9722", "valid_wer_total": "18.1585", "valid_n_error": "2.18566", "valid_ppl": "1.7", "valid_accuracy": "87.96", "valid_wer": "12.037", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "177500", "valid_best_accuracy": "87.96"} [2024-07-11 21:05:13,435][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 177500 updates [2024-07-11 21:05:13,435][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_177500.pt [2024-07-11 21:05:16,609][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_177500.pt [2024-07-11 21:05:20,975][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_177500.pt (epoch 2 @ 177500 updates, score 87.96) (writing took 7.540702083031647 seconds) [2024-07-11 21:08:23,296][train_inner][INFO] - {"epoch": 2, "update": 1.178, "loss": "0.903", "ntokens": "126.64", "acc_total": "126.64", "n_correct": "109.815", "wer_total": "126.64", "n_error": "16.825", "ppl": "1.87", "accuracy": "86.714", "wer": "13.286", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "177600", "lr": "0.000156376", "gnorm": "2.783", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "505447"} [2024-07-11 21:14:28,341][train_inner][INFO] - {"epoch": 2, "update": 1.179, "loss": "0.897", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "109.655", "wer_total": "126.53", "n_error": "16.865", "ppl": "1.86", "accuracy": "86.663", "wer": "13.329", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "177800", "lr": "0.000155908", "gnorm": "2.764", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "505812"} [2024-07-11 21:20:33,343][train_inner][INFO] - {"epoch": 2, "update": 1.18, "loss": "0.865", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "110.345", "wer_total": "126.915", "n_error": "16.565", "ppl": "1.82", "accuracy": "86.944", "wer": "13.052", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "178000", "lr": "0.000155442", "gnorm": "2.852", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "506177"} [2024-07-11 21:26:38,301][train_inner][INFO] - {"epoch": 2, "update": 1.182, "loss": "0.897", "ntokens": "126.595", "acc_total": "126.595", "n_correct": "109.67", "wer_total": "126.595", "n_error": "16.925", "ppl": "1.86", "accuracy": "86.631", "wer": "13.369", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "178200", "lr": "0.000154977", "gnorm": "2.755", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "506542"} [2024-07-11 21:32:43,270][train_inner][INFO] - {"epoch": 2, "update": 1.183, "loss": "0.903", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "110.335", "wer_total": "127.335", "n_error": "17", "ppl": "1.87", "accuracy": "86.649", "wer": "13.351", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "178400", "lr": "0.000154513", "gnorm": "2.917", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "506907"} [2024-07-11 21:38:48,234][train_inner][INFO] - {"epoch": 2, "update": 1.184, "loss": "0.878", "ntokens": "126.095", "acc_total": "126.095", "n_correct": "109.46", "wer_total": "126.095", "n_error": "16.63", "ppl": "1.84", "accuracy": "86.808", "wer": "13.188", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "178600", "lr": "0.000154051", "gnorm": "2.837", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "507272"} [2024-07-11 21:44:53,134][train_inner][INFO] - {"epoch": 2, "update": 1.186, "loss": "0.91", "ntokens": "127.755", "acc_total": "127.755", "n_correct": "110.635", "wer_total": "127.755", "n_error": "17.12", "ppl": "1.88", "accuracy": "86.599", "wer": "13.401", "wps": "70", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "178800", "lr": "0.00015359", "gnorm": "2.974", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "507637"} [2024-07-11 21:50:57,972][train_inner][INFO] - {"epoch": 2, "update": 1.187, "loss": "0.861", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "110.61", "wer_total": "126.835", "n_error": "16.225", "ppl": "1.82", "accuracy": "87.208", "wer": "12.792", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "179000", "lr": "0.000153131", "gnorm": "2.729", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "508002"} [2024-07-11 21:51:14,291][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-11 21:57:04,655][train_inner][INFO] - {"epoch": 2, "update": 1.188, "loss": "0.877", "ntokens": "126.88", "acc_total": "126.88", "n_correct": "110.04", "wer_total": "126.88", "n_error": "16.835", "ppl": "1.84", "accuracy": "86.728", "wer": "13.268", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "179200", "lr": "0.000152673", "gnorm": "2.611", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "508369"} [2024-07-11 22:03:09,744][train_inner][INFO] - {"epoch": 2, "update": 1.19, "loss": "0.851", "ntokens": "127.9", "acc_total": "127.9", "n_correct": "111.065", "wer_total": "127.9", "n_error": "16.835", "ppl": "1.8", "accuracy": "86.837", "wer": "13.163", "wps": "70.1", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "179400", "lr": "0.000152216", "gnorm": "2.756", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "508734"} [2024-07-11 22:09:14,670][train_inner][INFO] - {"epoch": 2, "update": 1.191, "loss": "0.893", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "110.56", "wer_total": "127.42", "n_error": "16.855", "ppl": "1.86", "accuracy": "86.768", "wer": "13.228", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "179600", "lr": "0.000151761", "gnorm": "2.71", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "509099"} [2024-07-11 22:15:19,605][train_inner][INFO] - {"epoch": 2, "update": 1.192, "loss": "0.834", "ntokens": "127.42", "acc_total": "127.42", "n_correct": "111.025", "wer_total": "127.42", "n_error": "16.39", "ppl": "1.78", "accuracy": "87.133", "wer": "12.863", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "179800", "lr": "0.000151307", "gnorm": "2.607", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "509464"} [2024-07-11 22:21:24,448][train_inner][INFO] - {"epoch": 2, "update": 1.194, "loss": "0.875", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "110.17", "wer_total": "126.9", "n_error": "16.72", "ppl": "1.83", "accuracy": "86.816", "wer": "13.176", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "180000", "lr": "0.000150854", "gnorm": "2.744", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "509829"} [2024-07-11 22:21:24,449][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-11 23:03:14,652][valid][INFO] - {"epoch": 2, "valid_loss": "0.763", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.9762", "valid_wer_total": "18.1585", "valid_n_error": "2.1817", "valid_ppl": "1.7", "valid_accuracy": "87.982", "valid_wer": "12.015", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "180000", "valid_best_accuracy": "87.982"} [2024-07-11 23:03:14,652][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 180000 updates [2024-07-11 23:03:14,653][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_180000.pt [2024-07-11 23:03:17,882][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_180000.pt [2024-07-11 23:03:22,286][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_180000.pt (epoch 2 @ 180000 updates, score 87.982) (writing took 7.633465591003187 seconds) [2024-07-11 23:09:27,042][train_inner][INFO] - {"epoch": 2, "update": 1.195, "loss": "0.897", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "109.38", "wer_total": "126.305", "n_error": "16.925", "ppl": "1.86", "accuracy": "86.6", "wer": "13.4", "wps": "8.8", "ups": "0.07", "wpb": "126.3", "bsz": "8", "num_updates": "180200", "lr": "0.000150403", "gnorm": "2.822", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "512711"} [2024-07-11 23:15:32,056][train_inner][INFO] - {"epoch": 2, "update": 1.196, "loss": "0.87", "ntokens": "126.455", "acc_total": "126.455", "n_correct": "109.945", "wer_total": "126.455", "n_error": "16.505", "ppl": "1.83", "accuracy": "86.944", "wer": "13.052", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "180400", "lr": "0.000149953", "gnorm": "2.767", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "513076"} [2024-07-11 23:21:37,186][train_inner][INFO] - {"epoch": 2, "update": 1.198, "loss": "0.902", "ntokens": "126.06", "acc_total": "126.06", "n_correct": "109.005", "wer_total": "126.06", "n_error": "17.045", "ppl": "1.87", "accuracy": "86.471", "wer": "13.521", "wps": "69", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "180600", "lr": "0.000149505", "gnorm": "2.683", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "513441"} [2024-07-11 23:27:42,451][train_inner][INFO] - {"epoch": 2, "update": 1.199, "loss": "0.836", "ntokens": "127.28", "acc_total": "127.28", "n_correct": "111.32", "wer_total": "127.28", "n_error": "15.955", "ppl": "1.79", "accuracy": "87.461", "wer": "12.535", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "180800", "lr": "0.000149058", "gnorm": "2.826", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "513807"} [2024-07-11 23:33:47,505][train_inner][INFO] - {"epoch": 2, "update": 1.2, "loss": "0.881", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "109.86", "wer_total": "126.9", "n_error": "17.03", "ppl": "1.84", "accuracy": "86.572", "wer": "13.42", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "181000", "lr": "0.000148612", "gnorm": "2.805", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "514172"} [2024-07-11 23:38:08,476][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-11 23:39:54,533][train_inner][INFO] - {"epoch": 2, "update": 1.202, "loss": "0.907", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "109.92", "wer_total": "127.185", "n_error": "17.26", "ppl": "1.87", "accuracy": "86.425", "wer": "13.571", "wps": "69.3", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "181200", "lr": "0.000148167", "gnorm": "3.224", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "514539"} [2024-07-11 23:45:59,772][train_inner][INFO] - {"epoch": 2, "update": 1.203, "loss": "0.869", "ntokens": "126.185", "acc_total": "126.185", "n_correct": "109.695", "wer_total": "126.185", "n_error": "16.475", "ppl": "1.83", "accuracy": "86.932", "wer": "13.056", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "181400", "lr": "0.000147724", "gnorm": "2.84", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "514904"} [2024-07-11 23:52:05,022][train_inner][INFO] - {"epoch": 2, "update": 1.204, "loss": "0.903", "ntokens": "127.88", "acc_total": "127.88", "n_correct": "110.585", "wer_total": "127.88", "n_error": "17.295", "ppl": "1.87", "accuracy": "86.476", "wer": "13.524", "wps": "70", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "181600", "lr": "0.000147282", "gnorm": "2.637", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "515269"} [2024-07-11 23:58:10,337][train_inner][INFO] - {"epoch": 2, "update": 1.206, "loss": "0.876", "ntokens": "127.99", "acc_total": "127.99", "n_correct": "111.275", "wer_total": "127.99", "n_error": "16.705", "ppl": "1.84", "accuracy": "86.94", "wer": "13.052", "wps": "70.1", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "181800", "lr": "0.000146841", "gnorm": "2.567", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "515634"} [2024-07-12 00:04:15,791][train_inner][INFO] - {"epoch": 2, "update": 1.207, "loss": "0.901", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "109.675", "wer_total": "127.055", "n_error": "17.375", "ppl": "1.87", "accuracy": "86.321", "wer": "13.675", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "182000", "lr": "0.000146402", "gnorm": "2.623", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "516000"} [2024-07-12 00:10:21,219][train_inner][INFO] - {"epoch": 2, "update": 1.208, "loss": "0.851", "ntokens": "126.655", "acc_total": "126.655", "n_correct": "110.1", "wer_total": "126.655", "n_error": "16.55", "ppl": "1.8", "accuracy": "86.929", "wer": "13.067", "wps": "69.3", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "182200", "lr": "0.000145964", "gnorm": "2.769", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "516365"} [2024-07-12 00:16:26,712][train_inner][INFO] - {"epoch": 2, "update": 1.21, "loss": "0.871", "ntokens": "128.24", "acc_total": "128.24", "n_correct": "111.69", "wer_total": "128.24", "n_error": "16.54", "ppl": "1.83", "accuracy": "87.095", "wer": "12.898", "wps": "70.2", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "182400", "lr": "0.000145528", "gnorm": "2.566", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "516731"} [2024-07-12 00:19:29,287][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 01:01:20,168][valid][INFO] - {"epoch": 2, "valid_loss": "0.764", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.9755", "valid_wer_total": "18.1585", "valid_n_error": "2.1822", "valid_ppl": "1.7", "valid_accuracy": "87.978", "valid_wer": "12.018", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "182500", "valid_best_accuracy": "87.982"} [2024-07-12 01:01:20,169][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 182500 updates [2024-07-12 01:01:20,169][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_182500.pt [2024-07-12 01:01:23,358][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_182500.pt [2024-07-12 01:01:25,484][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_182500.pt (epoch 2 @ 182500 updates, score 87.978) (writing took 5.314979742048308 seconds) [2024-07-12 01:04:27,946][train_inner][INFO] - {"epoch": 2, "update": 1.211, "loss": "0.873", "ntokens": "126.39", "acc_total": "126.39", "n_correct": "109.88", "wer_total": "126.39", "n_error": "16.5", "ppl": "1.83", "accuracy": "86.937", "wer": "13.055", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "182600", "lr": "0.000145092", "gnorm": "2.686", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "519612"} [2024-07-12 01:10:33,297][train_inner][INFO] - {"epoch": 2, "update": 1.212, "loss": "0.852", "ntokens": "127.04", "acc_total": "127.04", "n_correct": "110.735", "wer_total": "127.04", "n_error": "16.305", "ppl": "1.8", "accuracy": "87.165", "wer": "12.835", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "182800", "lr": "0.000144658", "gnorm": "2.642", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "519977"} [2024-07-12 01:16:38,892][train_inner][INFO] - {"epoch": 2, "update": 1.214, "loss": "0.851", "ntokens": "128.165", "acc_total": "128.165", "n_correct": "111.65", "wer_total": "128.165", "n_error": "16.505", "ppl": "1.8", "accuracy": "87.114", "wer": "12.878", "wps": "70.1", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "183000", "lr": "0.000144226", "gnorm": "2.96", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "520343"} [2024-07-12 01:16:42,467][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-12 01:20:54,423][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-12 01:22:47,724][train_inner][INFO] - {"epoch": 2, "update": 1.215, "loss": "0.918", "ntokens": "127.51", "acc_total": "127.51", "n_correct": "110.03", "wer_total": "127.51", "n_error": "17.475", "ppl": "1.89", "accuracy": "86.291", "wer": "13.705", "wps": "69.1", "ups": "0.54", "wpb": "127.5", "bsz": "8", "num_updates": "183200", "lr": "0.000143794", "gnorm": "2.918", "loss_scale": "256", "train_wall": "368", "gb_free": "6.5", "wall": "520712"} [2024-07-12 01:28:53,031][train_inner][INFO] - {"epoch": 2, "update": 1.216, "loss": "0.863", "ntokens": "126.595", "acc_total": "126.595", "n_correct": "109.77", "wer_total": "126.595", "n_error": "16.825", "ppl": "1.82", "accuracy": "86.71", "wer": "13.29", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "183400", "lr": "0.000143364", "gnorm": "2.685", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "521077"} [2024-07-12 01:34:58,371][train_inner][INFO] - {"epoch": 2, "update": 1.218, "loss": "0.884", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "109.58", "wer_total": "126.77", "n_error": "17.19", "ppl": "1.85", "accuracy": "86.44", "wer": "13.56", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "183600", "lr": "0.000142935", "gnorm": "2.665", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "521442"} [2024-07-12 01:41:03,887][train_inner][INFO] - {"epoch": 2, "update": 1.219, "loss": "0.873", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "110.065", "wer_total": "126.46", "n_error": "16.385", "ppl": "1.83", "accuracy": "87.035", "wer": "12.957", "wps": "69.2", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "183800", "lr": "0.000142508", "gnorm": "2.67", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "521808"} [2024-07-12 01:47:09,406][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "0.874", "ntokens": "127", "acc_total": "127", "n_correct": "110.425", "wer_total": "127", "n_error": "16.575", "ppl": "1.83", "accuracy": "86.949", "wer": "13.051", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "184000", "lr": "0.000142081", "gnorm": "2.883", "loss_scale": "256", "train_wall": "365", "gb_free": "6.5", "wall": "522173"} [2024-07-12 01:53:14,212][train_inner][INFO] - {"epoch": 2, "update": 1.222, "loss": "0.845", "ntokens": "126.275", "acc_total": "126.275", "n_correct": "110.41", "wer_total": "126.275", "n_error": "15.865", "ppl": "1.8", "accuracy": "87.436", "wer": "12.564", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "184200", "lr": "0.000141656", "gnorm": "2.618", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "522538"} [2024-07-12 01:59:19,154][train_inner][INFO] - {"epoch": 2, "update": 1.223, "loss": "0.841", "ntokens": "126.94", "acc_total": "126.94", "n_correct": "110.88", "wer_total": "126.94", "n_error": "16.06", "ppl": "1.79", "accuracy": "87.348", "wer": "12.652", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "184400", "lr": "0.000141233", "gnorm": "2.72", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "522903"} [2024-07-12 02:05:23,391][train_inner][INFO] - {"epoch": 2, "update": 1.224, "loss": "0.863", "ntokens": "127.15", "acc_total": "127.15", "n_correct": "110.665", "wer_total": "127.15", "n_error": "16.475", "ppl": "1.82", "accuracy": "87.035", "wer": "12.957", "wps": "69.8", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "184600", "lr": "0.00014081", "gnorm": "2.765", "loss_scale": "256", "train_wall": "363", "gb_free": "6.5", "wall": "523267"} [2024-07-12 02:11:27,686][train_inner][INFO] - {"epoch": 2, "update": 1.226, "loss": "0.88", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "110.565", "wer_total": "127.405", "n_error": "16.84", "ppl": "1.84", "accuracy": "86.782", "wer": "13.218", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "184800", "lr": "0.000140389", "gnorm": "2.562", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "523632"} [2024-07-12 02:17:31,749][train_inner][INFO] - {"epoch": 2, "update": 1.227, "loss": "0.9", "ntokens": "126.025", "acc_total": "126.025", "n_correct": "109", "wer_total": "126.025", "n_error": "17.02", "ppl": "1.87", "accuracy": "86.491", "wer": "13.505", "wps": "69.2", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "185000", "lr": "0.000139969", "gnorm": "2.606", "loss_scale": "256", "train_wall": "363", "gb_free": "6.5", "wall": "523996"} [2024-07-12 02:17:31,750][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 02:59:12,148][valid][INFO] - {"epoch": 2, "valid_loss": "0.761", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.9625", "valid_wer_total": "18.1585", "valid_n_error": "2.19562", "valid_ppl": "1.7", "valid_accuracy": "87.907", "valid_wer": "12.091", "valid_wps": "174.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "185000", "valid_best_accuracy": "87.982"} [2024-07-12 02:59:12,148][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 185000 updates [2024-07-12 02:59:12,149][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_185000.pt [2024-07-12 02:59:15,423][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_185000.pt [2024-07-12 02:59:17,835][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_185000.pt (epoch 2 @ 185000 updates, score 87.907) (writing took 5.686220492003486 seconds) [2024-07-12 03:05:20,976][train_inner][INFO] - {"epoch": 2, "update": 1.228, "loss": "0.85", "ntokens": "126.315", "acc_total": "126.315", "n_correct": "109.865", "wer_total": "126.315", "n_error": "16.45", "ppl": "1.8", "accuracy": "86.977", "wer": "13.023", "wps": "8.8", "ups": "0.07", "wpb": "126.3", "bsz": "8", "num_updates": "185200", "lr": "0.00013955", "gnorm": "2.517", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "526865"} [2024-07-12 03:11:24,302][train_inner][INFO] - {"epoch": 2, "update": 1.229, "loss": "0.878", "ntokens": "126.505", "acc_total": "126.505", "n_correct": "109.995", "wer_total": "126.505", "n_error": "16.505", "ppl": "1.84", "accuracy": "86.949", "wer": "13.047", "wps": "69.6", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "185400", "lr": "0.000139133", "gnorm": "2.698", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "527228"} [2024-07-12 03:17:27,561][train_inner][INFO] - {"epoch": 2, "update": 1.231, "loss": "0.882", "ntokens": "126.155", "acc_total": "126.155", "n_correct": "109.875", "wer_total": "126.155", "n_error": "16.265", "ppl": "1.84", "accuracy": "87.095", "wer": "12.893", "wps": "69.5", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "185600", "lr": "0.000138717", "gnorm": "2.591", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "527592"} [2024-07-12 03:23:31,173][train_inner][INFO] - {"epoch": 2, "update": 1.232, "loss": "0.87", "ntokens": "128.04", "acc_total": "128.04", "n_correct": "111.49", "wer_total": "128.04", "n_error": "16.55", "ppl": "1.83", "accuracy": "87.074", "wer": "12.926", "wps": "70.4", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "185800", "lr": "0.000138302", "gnorm": "2.524", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "527955"} [2024-07-12 03:29:34,728][train_inner][INFO] - {"epoch": 2, "update": 1.233, "loss": "0.927", "ntokens": "126.99", "acc_total": "126.99", "n_correct": "109.885", "wer_total": "126.99", "n_error": "17.1", "ppl": "1.9", "accuracy": "86.53", "wer": "13.466", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "186000", "lr": "0.000137888", "gnorm": "2.59", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "528319"} [2024-07-12 03:35:38,179][train_inner][INFO] - {"epoch": 2, "update": 1.235, "loss": "0.873", "ntokens": "125.69", "acc_total": "125.69", "n_correct": "109.225", "wer_total": "125.69", "n_error": "16.455", "ppl": "1.83", "accuracy": "86.9", "wer": "13.092", "wps": "69.2", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "186200", "lr": "0.000137476", "gnorm": "2.717", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "528682"} [2024-07-12 03:41:41,849][train_inner][INFO] - {"epoch": 2, "update": 1.236, "loss": "0.858", "ntokens": "127.25", "acc_total": "127.25", "n_correct": "110.655", "wer_total": "127.25", "n_error": "16.595", "ppl": "1.81", "accuracy": "86.959", "wer": "13.041", "wps": "70", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "186400", "lr": "0.000137065", "gnorm": "2.436", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "529046"} [2024-07-12 03:47:45,188][train_inner][INFO] - {"epoch": 2, "update": 1.237, "loss": "0.869", "ntokens": "127.875", "acc_total": "127.875", "n_correct": "111.535", "wer_total": "127.875", "n_error": "16.335", "ppl": "1.83", "accuracy": "87.222", "wer": "12.774", "wps": "70.4", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "186600", "lr": "0.000136655", "gnorm": "2.701", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "529409"} [2024-07-12 03:53:48,369][train_inner][INFO] - {"epoch": 2, "update": 1.239, "loss": "0.855", "ntokens": "127.37", "acc_total": "127.37", "n_correct": "110.925", "wer_total": "127.37", "n_error": "16.425", "ppl": "1.81", "accuracy": "87.089", "wer": "12.896", "wps": "70.1", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "186800", "lr": "0.000136246", "gnorm": "2.545", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "529772"} [2024-07-12 03:56:13,432][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-12 03:59:53,263][train_inner][INFO] - {"epoch": 2, "update": 1.24, "loss": "0.865", "ntokens": "125.885", "acc_total": "125.885", "n_correct": "109.485", "wer_total": "125.885", "n_error": "16.395", "ppl": "1.82", "accuracy": "86.972", "wer": "13.024", "wps": "69", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "187000", "lr": "0.000135838", "gnorm": "3.312", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "530137"} [2024-07-12 04:05:56,468][train_inner][INFO] - {"epoch": 2, "update": 1.241, "loss": "0.837", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "111.07", "wer_total": "127.095", "n_error": "16.02", "ppl": "1.79", "accuracy": "87.391", "wer": "12.605", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "187200", "lr": "0.000135432", "gnorm": "2.524", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "530501"} [2024-07-12 04:11:59,499][train_inner][INFO] - {"epoch": 2, "update": 1.243, "loss": "0.81", "ntokens": "125.88", "acc_total": "125.88", "n_correct": "110.135", "wer_total": "125.88", "n_error": "15.745", "ppl": "1.75", "accuracy": "87.492", "wer": "12.508", "wps": "69.3", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "187400", "lr": "0.000135027", "gnorm": "2.609", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "530864"} [2024-07-12 04:15:01,010][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 04:56:37,183][valid][INFO] - {"epoch": 2, "valid_loss": "0.745", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.011", "valid_wer_total": "18.1585", "valid_n_error": "2.14714", "valid_ppl": "1.68", "valid_accuracy": "88.174", "valid_wer": "11.824", "valid_wps": "174.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "187500", "valid_best_accuracy": "88.174"} [2024-07-12 04:56:37,184][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 187500 updates [2024-07-12 04:56:37,184][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_187500.pt [2024-07-12 04:56:40,405][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_187500.pt [2024-07-12 04:56:45,738][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_187500.pt (epoch 2 @ 187500 updates, score 88.174) (writing took 8.554666129057296 seconds) [2024-07-12 04:59:47,002][train_inner][INFO] - {"epoch": 2, "update": 1.244, "loss": "0.875", "ntokens": "127.12", "acc_total": "127.12", "n_correct": "110.735", "wer_total": "127.12", "n_error": "16.385", "ppl": "1.83", "accuracy": "87.111", "wer": "12.889", "wps": "8.9", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "187600", "lr": "0.000134623", "gnorm": "2.586", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "533731"} [2024-07-12 05:05:50,078][train_inner][INFO] - {"epoch": 2, "update": 1.245, "loss": "0.835", "ntokens": "126.385", "acc_total": "126.385", "n_correct": "110.505", "wer_total": "126.385", "n_error": "15.88", "ppl": "1.78", "accuracy": "87.435", "wer": "12.565", "wps": "69.6", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "187800", "lr": "0.00013422", "gnorm": "2.561", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "534094"} [2024-07-12 05:11:53,137][train_inner][INFO] - {"epoch": 2, "update": 1.247, "loss": "0.85", "ntokens": "127.28", "acc_total": "127.28", "n_correct": "110.66", "wer_total": "127.28", "n_error": "16.62", "ppl": "1.8", "accuracy": "86.942", "wer": "13.058", "wps": "70.1", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "188000", "lr": "0.000133819", "gnorm": "2.649", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "534457"} [2024-07-12 05:17:55,993][train_inner][INFO] - {"epoch": 2, "update": 1.248, "loss": "0.843", "ntokens": "126.51", "acc_total": "126.51", "n_correct": "110.335", "wer_total": "126.51", "n_error": "16.175", "ppl": "1.79", "accuracy": "87.214", "wer": "12.786", "wps": "69.7", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "188200", "lr": "0.000133418", "gnorm": "2.703", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "534820"} [2024-07-12 05:23:58,816][train_inner][INFO] - {"epoch": 2, "update": 1.249, "loss": "0.84", "ntokens": "126.07", "acc_total": "126.07", "n_correct": "110.16", "wer_total": "126.07", "n_error": "15.905", "ppl": "1.79", "accuracy": "87.38", "wer": "12.616", "wps": "69.5", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "188400", "lr": "0.000133019", "gnorm": "2.522", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "535183"} [2024-07-12 05:30:01,585][train_inner][INFO] - {"epoch": 2, "update": 1.251, "loss": "0.845", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "110.575", "wer_total": "126.92", "n_error": "16.345", "ppl": "1.8", "accuracy": "87.122", "wer": "12.878", "wps": "70", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "188600", "lr": "0.000132621", "gnorm": "2.596", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "535546"} [2024-07-12 05:36:04,517][train_inner][INFO] - {"epoch": 2, "update": 1.252, "loss": "0.864", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "109.795", "wer_total": "126.325", "n_error": "16.525", "ppl": "1.82", "accuracy": "86.915", "wer": "13.081", "wps": "69.6", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "188800", "lr": "0.000132225", "gnorm": "2.518", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "535909"} [2024-07-12 05:42:07,390][train_inner][INFO] - {"epoch": 2, "update": 1.253, "loss": "0.791", "ntokens": "127.065", "acc_total": "127.065", "n_correct": "111.61", "wer_total": "127.065", "n_error": "15.45", "ppl": "1.73", "accuracy": "87.837", "wer": "12.159", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "189000", "lr": "0.000131829", "gnorm": "2.424", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "536271"} [2024-07-12 05:42:19,993][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 256.0 [2024-07-12 05:48:12,004][train_inner][INFO] - {"epoch": 2, "update": 1.255, "loss": "0.842", "ntokens": "127.66", "acc_total": "127.66", "n_correct": "111.34", "wer_total": "127.66", "n_error": "16.315", "ppl": "1.79", "accuracy": "87.216", "wer": "12.78", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "189200", "lr": "0.000131435", "gnorm": "2.533", "loss_scale": "256", "train_wall": "364", "gb_free": "6.5", "wall": "536636"} [2024-07-12 05:54:14,889][train_inner][INFO] - {"epoch": 2, "update": 1.256, "loss": "0.854", "ntokens": "126.885", "acc_total": "126.885", "n_correct": "110.445", "wer_total": "126.885", "n_error": "16.435", "ppl": "1.81", "accuracy": "87.043", "wer": "12.953", "wps": "69.9", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "189400", "lr": "0.000131042", "gnorm": "2.647", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "536999"} [2024-07-12 06:00:18,056][train_inner][INFO] - {"epoch": 2, "update": 1.257, "loss": "0.847", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "110.415", "wer_total": "126.82", "n_error": "16.4", "ppl": "1.8", "accuracy": "87.064", "wer": "12.932", "wps": "69.8", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "189600", "lr": "0.00013065", "gnorm": "2.775", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "537362"} [2024-07-12 06:06:20,996][train_inner][INFO] - {"epoch": 2, "update": 1.259, "loss": "0.868", "ntokens": "126.345", "acc_total": "126.345", "n_correct": "110.215", "wer_total": "126.345", "n_error": "16.125", "ppl": "1.82", "accuracy": "87.233", "wer": "12.763", "wps": "69.6", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "189800", "lr": "0.000130259", "gnorm": "2.657", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "537725"} [2024-07-12 06:12:23,935][train_inner][INFO] - {"epoch": 2, "update": 1.26, "loss": "0.828", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "111.45", "wer_total": "127.41", "n_error": "15.95", "ppl": "1.78", "accuracy": "87.474", "wer": "12.519", "wps": "70.2", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "190000", "lr": "0.000129869", "gnorm": "2.555", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "538088"} [2024-07-12 06:12:23,936][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 06:53:58,568][valid][INFO] - {"epoch": 2, "valid_loss": "0.743", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.0291", "valid_wer_total": "18.1585", "valid_n_error": "2.12918", "valid_ppl": "1.67", "valid_accuracy": "88.273", "valid_wer": "11.726", "valid_wps": "174.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "190000", "valid_best_accuracy": "88.273"} [2024-07-12 06:53:58,568][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 190000 updates [2024-07-12 06:53:58,569][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_190000.pt [2024-07-12 06:54:01,761][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_190000.pt [2024-07-12 06:54:05,992][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_190000.pt (epoch 2 @ 190000 updates, score 88.273) (writing took 7.4239740279736 seconds) [2024-07-12 07:00:08,517][train_inner][INFO] - {"epoch": 2, "update": 1.261, "loss": "0.861", "ntokens": "126.035", "acc_total": "126.035", "n_correct": "109.855", "wer_total": "126.035", "n_error": "16.175", "ppl": "1.82", "accuracy": "87.162", "wer": "12.834", "wps": "8.8", "ups": "0.07", "wpb": "126", "bsz": "8", "num_updates": "190200", "lr": "0.000129481", "gnorm": "2.339", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "540953"} [2024-07-12 07:06:11,325][train_inner][INFO] - {"epoch": 2, "update": 1.263, "loss": "0.831", "ntokens": "126.775", "acc_total": "126.775", "n_correct": "110.855", "wer_total": "126.775", "n_error": "15.92", "ppl": "1.78", "accuracy": "87.442", "wer": "12.558", "wps": "69.9", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "190400", "lr": "0.000129094", "gnorm": "2.547", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "541315"} [2024-07-12 07:12:14,032][train_inner][INFO] - {"epoch": 2, "update": 1.264, "loss": "0.845", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "110.66", "wer_total": "126.59", "n_error": "15.925", "ppl": "1.8", "accuracy": "87.416", "wer": "12.58", "wps": "69.8", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "190600", "lr": "0.000128707", "gnorm": "2.472", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "541678"} [2024-07-12 07:18:16,763][train_inner][INFO] - {"epoch": 2, "update": 1.265, "loss": "0.836", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "111.425", "wer_total": "127.2", "n_error": "15.77", "ppl": "1.78", "accuracy": "87.598", "wer": "12.398", "wps": "70.1", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "190800", "lr": "0.000128322", "gnorm": "2.66", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "542041"} [2024-07-12 07:24:19,565][train_inner][INFO] - {"epoch": 2, "update": 1.267, "loss": "0.837", "ntokens": "126.43", "acc_total": "126.43", "n_correct": "110.465", "wer_total": "126.43", "n_error": "15.965", "ppl": "1.79", "accuracy": "87.372", "wer": "12.628", "wps": "69.7", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "191000", "lr": "0.000127939", "gnorm": "2.489", "loss_scale": "256", "train_wall": "362", "gb_free": "6.5", "wall": "542404"} [2024-07-12 07:30:22,784][train_inner][INFO] - {"epoch": 2, "update": 1.268, "loss": "0.826", "ntokens": "126.845", "acc_total": "126.845", "n_correct": "110.88", "wer_total": "126.845", "n_error": "15.965", "ppl": "1.77", "accuracy": "87.414", "wer": "12.586", "wps": "69.8", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "191200", "lr": "0.000127556", "gnorm": "2.293", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "542767"} [2024-07-12 07:36:26,730][train_inner][INFO] - {"epoch": 2, "update": 1.269, "loss": "0.842", "ntokens": "126.395", "acc_total": "126.395", "n_correct": "110.26", "wer_total": "126.395", "n_error": "16.135", "ppl": "1.79", "accuracy": "87.234", "wer": "12.766", "wps": "69.5", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "191400", "lr": "0.000127174", "gnorm": "2.639", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "543131"} [2024-07-12 07:42:30,821][train_inner][INFO] - {"epoch": 2, "update": 1.271, "loss": "0.861", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "110.11", "wer_total": "126.835", "n_error": "16.725", "ppl": "1.82", "accuracy": "86.814", "wer": "13.186", "wps": "69.7", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "191600", "lr": "0.000126794", "gnorm": "2.683", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "543495"} [2024-07-12 07:48:34,755][train_inner][INFO] - {"epoch": 2, "update": 1.272, "loss": "0.841", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "110.365", "wer_total": "126.295", "n_error": "15.925", "ppl": "1.79", "accuracy": "87.387", "wer": "12.609", "wps": "69.4", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "191800", "lr": "0.000126415", "gnorm": "2.627", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "543859"} [2024-07-12 07:54:38,721][train_inner][INFO] - {"epoch": 2, "update": 1.273, "loss": "0.838", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "110.75", "wer_total": "126.875", "n_error": "16.12", "ppl": "1.79", "accuracy": "87.291", "wer": "12.705", "wps": "69.7", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "192000", "lr": "0.000126036", "gnorm": "2.425", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "544223"} [2024-07-12 08:00:42,798][train_inner][INFO] - {"epoch": 2, "update": 1.275, "loss": "0.834", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "111.54", "wer_total": "127.32", "n_error": "15.765", "ppl": "1.78", "accuracy": "87.606", "wer": "12.382", "wps": "69.9", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "192200", "lr": "0.000125659", "gnorm": "2.553", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "544587"} [2024-07-12 08:06:46,837][train_inner][INFO] - {"epoch": 2, "update": 1.276, "loss": "0.834", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "110.73", "wer_total": "126.93", "n_error": "16.2", "ppl": "1.78", "accuracy": "87.237", "wer": "12.763", "wps": "69.7", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "192400", "lr": "0.000125284", "gnorm": "2.45", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "544951"} [2024-07-12 08:09:48,905][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 08:51:32,249][valid][INFO] - {"epoch": 2, "valid_loss": "0.746", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.0176", "valid_wer_total": "18.1585", "valid_n_error": "2.14014", "valid_ppl": "1.68", "valid_accuracy": "88.21", "valid_wer": "11.786", "valid_wps": "174", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "192500", "valid_best_accuracy": "88.273"} [2024-07-12 08:51:32,249][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 192500 updates [2024-07-12 08:51:32,249][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_192500.pt [2024-07-12 08:51:35,500][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_192500.pt [2024-07-12 08:51:37,666][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_192500.pt (epoch 2 @ 192500 updates, score 88.21) (writing took 5.417039063991979 seconds) [2024-07-12 08:54:39,648][train_inner][INFO] - {"epoch": 2, "update": 1.277, "loss": "0.872", "ntokens": "127.58", "acc_total": "127.58", "n_correct": "110.545", "wer_total": "127.58", "n_error": "17.025", "ppl": "1.83", "accuracy": "86.648", "wer": "13.345", "wps": "8.9", "ups": "0.07", "wpb": "127.6", "bsz": "8", "num_updates": "192600", "lr": "0.000124909", "gnorm": "2.773", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "547824"} [2024-07-12 09:00:44,187][train_inner][INFO] - {"epoch": 2, "update": 1.279, "loss": "0.821", "ntokens": "126.535", "acc_total": "126.535", "n_correct": "110.68", "wer_total": "126.535", "n_error": "15.84", "ppl": "1.77", "accuracy": "87.47", "wer": "12.518", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "192800", "lr": "0.000124535", "gnorm": "2.538", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "548188"} [2024-07-12 09:06:48,775][train_inner][INFO] - {"epoch": 2, "update": 1.28, "loss": "0.88", "ntokens": "127.875", "acc_total": "127.875", "n_correct": "110.67", "wer_total": "127.875", "n_error": "17.2", "ppl": "1.84", "accuracy": "86.545", "wer": "13.451", "wps": "70.1", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "193000", "lr": "0.000124163", "gnorm": "2.577", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "548553"} [2024-07-12 09:12:53,317][train_inner][INFO] - {"epoch": 2, "update": 1.281, "loss": "0.829", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "110.99", "wer_total": "127.03", "n_error": "16.035", "ppl": "1.78", "accuracy": "87.373", "wer": "12.623", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "193200", "lr": "0.000123791", "gnorm": "2.504", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "548917"} [2024-07-12 09:18:57,941][train_inner][INFO] - {"epoch": 2, "update": 1.283, "loss": "0.859", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "110.715", "wer_total": "127.025", "n_error": "16.29", "ppl": "1.81", "accuracy": "87.16", "wer": "12.824", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "193400", "lr": "0.000123421", "gnorm": "2.596", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "549282"} [2024-07-12 09:25:02,428][train_inner][INFO] - {"epoch": 2, "update": 1.284, "loss": "0.838", "ntokens": "126.215", "acc_total": "126.215", "n_correct": "110.335", "wer_total": "126.215", "n_error": "15.88", "ppl": "1.79", "accuracy": "87.418", "wer": "12.582", "wps": "69.3", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "193600", "lr": "0.000123052", "gnorm": "2.311", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "549646"} [2024-07-12 09:31:07,276][train_inner][INFO] - {"epoch": 2, "update": 1.285, "loss": "0.837", "ntokens": "128.155", "acc_total": "128.155", "n_correct": "111.92", "wer_total": "128.155", "n_error": "16.235", "ppl": "1.79", "accuracy": "87.332", "wer": "12.668", "wps": "70.3", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "193800", "lr": "0.000122684", "gnorm": "2.395", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "550011"} [2024-07-12 09:37:11,682][train_inner][INFO] - {"epoch": 2, "update": 1.287, "loss": "0.831", "ntokens": "126.18", "acc_total": "126.18", "n_correct": "110.265", "wer_total": "126.18", "n_error": "15.915", "ppl": "1.78", "accuracy": "87.387", "wer": "12.613", "wps": "69.3", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "194000", "lr": "0.000122317", "gnorm": "2.407", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "550376"} [2024-07-12 09:43:16,256][train_inner][INFO] - {"epoch": 2, "update": 1.288, "loss": "0.876", "ntokens": "126.825", "acc_total": "126.825", "n_correct": "110.53", "wer_total": "126.825", "n_error": "16.29", "ppl": "1.83", "accuracy": "87.152", "wer": "12.844", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "194200", "lr": "0.000121951", "gnorm": "2.424", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "550740"} [2024-07-12 09:49:21,073][train_inner][INFO] - {"epoch": 2, "update": 1.289, "loss": "0.873", "ntokens": "127.61", "acc_total": "127.61", "n_correct": "111.015", "wer_total": "127.61", "n_error": "16.595", "ppl": "1.83", "accuracy": "86.996", "wer": "13.004", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "194400", "lr": "0.000121586", "gnorm": "2.385", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "551105"} [2024-07-12 09:55:25,881][train_inner][INFO] - {"epoch": 2, "update": 1.29, "loss": "0.849", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "110.24", "wer_total": "126.76", "n_error": "16.52", "ppl": "1.8", "accuracy": "86.967", "wer": "13.033", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "194600", "lr": "0.000121222", "gnorm": "2.496", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "551470"} [2024-07-12 10:01:30,511][train_inner][INFO] - {"epoch": 2, "update": 1.292, "loss": "0.859", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "110.405", "wer_total": "126.675", "n_error": "16.265", "ppl": "1.81", "accuracy": "87.156", "wer": "12.84", "wps": "69.5", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "194800", "lr": "0.00012086", "gnorm": "2.458", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "551835"} [2024-07-12 10:07:35,006][train_inner][INFO] - {"epoch": 2, "update": 1.293, "loss": "0.864", "ntokens": "125.925", "acc_total": "125.925", "n_correct": "110.105", "wer_total": "125.925", "n_error": "15.815", "ppl": "1.82", "accuracy": "87.437", "wer": "12.559", "wps": "69.1", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "195000", "lr": "0.000120498", "gnorm": "2.421", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "552199"} [2024-07-12 10:07:35,007][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 10:49:23,049][valid][INFO] - {"epoch": 2, "valid_loss": "0.725", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.0681", "valid_wer_total": "18.1585", "valid_n_error": "2.08995", "valid_ppl": "1.65", "valid_accuracy": "88.488", "valid_wer": "11.51", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "195000", "valid_best_accuracy": "88.488"} [2024-07-12 10:49:23,050][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 195000 updates [2024-07-12 10:49:23,050][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_195000.pt [2024-07-12 10:49:26,243][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_195000.pt [2024-07-12 10:49:30,373][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_195000.pt (epoch 2 @ 195000 updates, score 88.488) (writing took 7.322717025061138 seconds) [2024-07-12 10:55:34,974][train_inner][INFO] - {"epoch": 2, "update": 1.294, "loss": "0.833", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "110.875", "wer_total": "126.82", "n_error": "15.945", "ppl": "1.78", "accuracy": "87.427", "wer": "12.573", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "195200", "lr": "0.000120138", "gnorm": "2.421", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "555079"} [2024-07-12 10:58:32,035][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-12 11:01:41,559][train_inner][INFO] - {"epoch": 2, "update": 1.296, "loss": "0.835", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "111.385", "wer_total": "127.465", "n_error": "16.075", "ppl": "1.78", "accuracy": "87.385", "wer": "12.611", "wps": "69.5", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "195400", "lr": "0.000119778", "gnorm": "2.619", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "555446"} [2024-07-12 11:07:46,281][train_inner][INFO] - {"epoch": 2, "update": 1.297, "loss": "0.826", "ntokens": "126.63", "acc_total": "126.63", "n_correct": "110.81", "wer_total": "126.63", "n_error": "15.82", "ppl": "1.77", "accuracy": "87.507", "wer": "12.493", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "195600", "lr": "0.00011942", "gnorm": "2.444", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "555810"} [2024-07-12 11:13:50,786][train_inner][INFO] - {"epoch": 2, "update": 1.298, "loss": "0.832", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "110.96", "wer_total": "127.025", "n_error": "16.065", "ppl": "1.78", "accuracy": "87.353", "wer": "12.647", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "195800", "lr": "0.000119063", "gnorm": "2.434", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "556175"} [2024-07-12 11:19:55,580][train_inner][INFO] - {"epoch": 2, "update": 1.3, "loss": "0.86", "ntokens": "127.04", "acc_total": "127.04", "n_correct": "110.905", "wer_total": "127.04", "n_error": "16.135", "ppl": "1.81", "accuracy": "87.299", "wer": "12.701", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "196000", "lr": "0.000118707", "gnorm": "2.694", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "556540"} [2024-07-12 11:26:00,412][train_inner][INFO] - {"epoch": 2, "update": 1.301, "loss": "0.832", "ntokens": "127.38", "acc_total": "127.38", "n_correct": "111.18", "wer_total": "127.38", "n_error": "16.2", "ppl": "1.78", "accuracy": "87.282", "wer": "12.718", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "196200", "lr": "0.000118352", "gnorm": "2.412", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "556904"} [2024-07-12 11:32:05,327][train_inner][INFO] - {"epoch": 2, "update": 1.302, "loss": "0.807", "ntokens": "126.55", "acc_total": "126.55", "n_correct": "110.505", "wer_total": "126.55", "n_error": "16.045", "ppl": "1.75", "accuracy": "87.321", "wer": "12.679", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "196400", "lr": "0.000117998", "gnorm": "2.448", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "557269"} [2024-07-12 11:38:10,069][train_inner][INFO] - {"epoch": 2, "update": 1.304, "loss": "0.837", "ntokens": "125.74", "acc_total": "125.74", "n_correct": "109.9", "wer_total": "125.74", "n_error": "15.84", "ppl": "1.79", "accuracy": "87.403", "wer": "12.597", "wps": "68.9", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "196600", "lr": "0.000117645", "gnorm": "2.44", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "557634"} [2024-07-12 11:44:14,920][train_inner][INFO] - {"epoch": 2, "update": 1.305, "loss": "0.838", "ntokens": "126.91", "acc_total": "126.91", "n_correct": "110.765", "wer_total": "126.91", "n_error": "16.13", "ppl": "1.79", "accuracy": "87.278", "wer": "12.71", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "196800", "lr": "0.000117293", "gnorm": "2.578", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "557999"} [2024-07-12 11:50:19,967][train_inner][INFO] - {"epoch": 2, "update": 1.306, "loss": "0.842", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "110.8", "wer_total": "127.155", "n_error": "16.355", "ppl": "1.79", "accuracy": "87.138", "wer": "12.862", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "197000", "lr": "0.000116942", "gnorm": "2.487", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "558364"} [2024-07-12 11:56:24,652][train_inner][INFO] - {"epoch": 2, "update": 1.308, "loss": "0.858", "ntokens": "125.57", "acc_total": "125.57", "n_correct": "109.485", "wer_total": "125.57", "n_error": "16.085", "ppl": "1.81", "accuracy": "87.19", "wer": "12.81", "wps": "68.9", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "197200", "lr": "0.000116592", "gnorm": "2.494", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "558729"} [2024-07-12 12:02:29,575][train_inner][INFO] - {"epoch": 2, "update": 1.309, "loss": "0.823", "ntokens": "126.88", "acc_total": "126.88", "n_correct": "111.12", "wer_total": "126.88", "n_error": "15.755", "ppl": "1.77", "accuracy": "87.579", "wer": "12.417", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "197400", "lr": "0.000116243", "gnorm": "2.557", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "559094"} [2024-07-12 12:05:31,984][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 12:47:20,457][valid][INFO] - {"epoch": 2, "valid_loss": "0.725", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.0804", "valid_wer_total": "18.1585", "valid_n_error": "2.07778", "valid_ppl": "1.65", "valid_accuracy": "88.556", "valid_wer": "11.442", "valid_wps": "173.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "197500", "valid_best_accuracy": "88.556"} [2024-07-12 12:47:20,458][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 197500 updates [2024-07-12 12:47:20,458][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_197500.pt [2024-07-12 12:47:23,692][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_197500.pt [2024-07-12 12:47:27,938][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_197500.pt (epoch 2 @ 197500 updates, score 88.556) (writing took 7.479788611992262 seconds) [2024-07-12 12:50:30,293][train_inner][INFO] - {"epoch": 2, "update": 1.31, "loss": "0.822", "ntokens": "126.435", "acc_total": "126.435", "n_correct": "110.6", "wer_total": "126.435", "n_error": "15.835", "ppl": "1.77", "accuracy": "87.476", "wer": "12.524", "wps": "8.8", "ups": "0.07", "wpb": "126.4", "bsz": "8", "num_updates": "197600", "lr": "0.000115896", "gnorm": "2.419", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "561974"} [2024-07-12 12:56:24,573][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-12 12:56:37,343][train_inner][INFO] - {"epoch": 2, "update": 1.312, "loss": "0.856", "ntokens": "127.65", "acc_total": "127.65", "n_correct": "110.915", "wer_total": "127.65", "n_error": "16.73", "ppl": "1.81", "accuracy": "86.89", "wer": "13.106", "wps": "69.6", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "197800", "lr": "0.000115549", "gnorm": "2.484", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "562341"} [2024-07-12 13:02:42,503][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "0.832", "ntokens": "126.94", "acc_total": "126.94", "n_correct": "111.105", "wer_total": "126.94", "n_error": "15.835", "ppl": "1.78", "accuracy": "87.526", "wer": "12.474", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "198000", "lr": "0.000115203", "gnorm": "2.507", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "562707"} [2024-07-12 13:08:47,692][train_inner][INFO] - {"epoch": 2, "update": 1.314, "loss": "0.809", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "110.26", "wer_total": "125.995", "n_error": "15.725", "ppl": "1.75", "accuracy": "87.511", "wer": "12.481", "wps": "69", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "198200", "lr": "0.000114859", "gnorm": "2.537", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "563072"} [2024-07-12 13:14:52,700][train_inner][INFO] - {"epoch": 2, "update": 1.316, "loss": "0.834", "ntokens": "127.48", "acc_total": "127.48", "n_correct": "111.445", "wer_total": "127.48", "n_error": "16.03", "ppl": "1.78", "accuracy": "87.422", "wer": "12.575", "wps": "69.9", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "198400", "lr": "0.000114515", "gnorm": "2.451", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "563437"} [2024-07-12 13:18:15,487][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-12 13:20:59,651][train_inner][INFO] - {"epoch": 2, "update": 1.317, "loss": "0.83", "ntokens": "125.905", "acc_total": "125.905", "n_correct": "110.155", "wer_total": "125.905", "n_error": "15.75", "ppl": "1.78", "accuracy": "87.491", "wer": "12.509", "wps": "68.6", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "198600", "lr": "0.000114173", "gnorm": "2.432", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "563804"} [2024-07-12 13:27:04,444][train_inner][INFO] - {"epoch": 2, "update": 1.318, "loss": "0.862", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "110.43", "wer_total": "126.68", "n_error": "16.25", "ppl": "1.82", "accuracy": "87.172", "wer": "12.828", "wps": "69.5", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "198800", "lr": "0.000113831", "gnorm": "2.5", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "564168"} [2024-07-12 13:33:09,307][train_inner][INFO] - {"epoch": 2, "update": 1.32, "loss": "0.825", "ntokens": "127.475", "acc_total": "127.475", "n_correct": "111.46", "wer_total": "127.475", "n_error": "16.005", "ppl": "1.77", "accuracy": "87.437", "wer": "12.555", "wps": "69.9", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "199000", "lr": "0.000113491", "gnorm": "2.394", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "564533"} [2024-07-12 13:39:14,219][train_inner][INFO] - {"epoch": 2, "update": 1.321, "loss": "0.834", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "110.825", "wer_total": "127.075", "n_error": "16.245", "ppl": "1.78", "accuracy": "87.212", "wer": "12.784", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "199200", "lr": "0.000113151", "gnorm": "2.659", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "564898"} [2024-07-12 13:45:18,894][train_inner][INFO] - {"epoch": 2, "update": 1.322, "loss": "0.848", "ntokens": "126.505", "acc_total": "126.505", "n_correct": "110.585", "wer_total": "126.505", "n_error": "15.915", "ppl": "1.8", "accuracy": "87.416", "wer": "12.581", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "199400", "lr": "0.000112813", "gnorm": "2.479", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "565263"} [2024-07-12 13:51:23,949][train_inner][INFO] - {"epoch": 2, "update": 1.324, "loss": "0.8", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "111.38", "wer_total": "126.795", "n_error": "15.415", "ppl": "1.74", "accuracy": "87.843", "wer": "12.157", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "199600", "lr": "0.000112475", "gnorm": "2.469", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "565628"} [2024-07-12 13:57:29,105][train_inner][INFO] - {"epoch": 2, "update": 1.325, "loss": "0.841", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "110.9", "wer_total": "127.025", "n_error": "16.12", "ppl": "1.79", "accuracy": "87.306", "wer": "12.69", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "199800", "lr": "0.000112139", "gnorm": "2.347", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "565993"} [2024-07-12 14:03:34,219][train_inner][INFO] - {"epoch": 2, "update": 1.326, "loss": "0.804", "ntokens": "125.99", "acc_total": "125.99", "n_correct": "110.58", "wer_total": "125.99", "n_error": "15.41", "ppl": "1.75", "accuracy": "87.769", "wer": "12.231", "wps": "69", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "200000", "lr": "0.000111803", "gnorm": "2.515", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "566358"} [2024-07-12 14:03:34,220][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 14:45:25,272][valid][INFO] - {"epoch": 2, "valid_loss": "0.721", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.072", "valid_wer_total": "18.1585", "valid_n_error": "2.0862", "valid_ppl": "1.65", "valid_accuracy": "88.51", "valid_wer": "11.489", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "200000", "valid_best_accuracy": "88.556"} [2024-07-12 14:45:25,273][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 200000 updates [2024-07-12 14:45:25,273][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_200000.pt [2024-07-12 14:45:28,494][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_200000.pt [2024-07-12 14:45:30,633][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_200000.pt (epoch 2 @ 200000 updates, score 88.51) (writing took 5.359368551988155 seconds) [2024-07-12 14:51:35,532][train_inner][INFO] - {"epoch": 2, "update": 1.328, "loss": "0.796", "ntokens": "127.76", "acc_total": "127.76", "n_correct": "111.885", "wer_total": "127.76", "n_error": "15.86", "ppl": "1.74", "accuracy": "87.574", "wer": "12.414", "wps": "8.9", "ups": "0.07", "wpb": "127.8", "bsz": "8", "num_updates": "200200", "lr": "0.000111469", "gnorm": "2.213", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "569240"} [2024-07-12 14:57:40,633][train_inner][INFO] - {"epoch": 2, "update": 1.329, "loss": "0.833", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "110.87", "wer_total": "126.725", "n_error": "15.845", "ppl": "1.78", "accuracy": "87.489", "wer": "12.503", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "200400", "lr": "0.000111136", "gnorm": "2.359", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "569605"} [2024-07-12 15:03:45,603][train_inner][INFO] - {"epoch": 2, "update": 1.33, "loss": "0.834", "ntokens": "126.74", "acc_total": "126.74", "n_correct": "110.42", "wer_total": "126.74", "n_error": "16.32", "ppl": "1.78", "accuracy": "87.123", "wer": "12.877", "wps": "69.5", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "200600", "lr": "0.000110803", "gnorm": "2.385", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "569970"} [2024-07-12 15:09:50,780][train_inner][INFO] - {"epoch": 2, "update": 1.332, "loss": "0.799", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "112.035", "wer_total": "127.515", "n_error": "15.48", "ppl": "1.74", "accuracy": "87.86", "wer": "12.14", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "200800", "lr": "0.000110472", "gnorm": "2.406", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "570335"} [2024-07-12 15:15:56,047][train_inner][INFO] - {"epoch": 2, "update": 1.333, "loss": "0.858", "ntokens": "127.845", "acc_total": "127.845", "n_correct": "111.81", "wer_total": "127.845", "n_error": "16.035", "ppl": "1.81", "accuracy": "87.457", "wer": "12.543", "wps": "70", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "201000", "lr": "0.000110141", "gnorm": "2.53", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "570700"} [2024-07-12 15:22:01,168][train_inner][INFO] - {"epoch": 2, "update": 1.334, "loss": "0.82", "ntokens": "127.855", "acc_total": "127.855", "n_correct": "111.86", "wer_total": "127.855", "n_error": "15.995", "ppl": "1.77", "accuracy": "87.49", "wer": "12.51", "wps": "70", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "201200", "lr": "0.000109812", "gnorm": "2.447", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "571065"} [2024-07-12 15:28:06,255][train_inner][INFO] - {"epoch": 2, "update": 1.336, "loss": "0.81", "ntokens": "126.585", "acc_total": "126.585", "n_correct": "111.18", "wer_total": "126.585", "n_error": "15.405", "ppl": "1.75", "accuracy": "87.83", "wer": "12.17", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "201400", "lr": "0.000109483", "gnorm": "2.381", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "571430"} [2024-07-12 15:34:11,356][train_inner][INFO] - {"epoch": 2, "update": 1.337, "loss": "0.803", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "110.55", "wer_total": "126.01", "n_error": "15.45", "ppl": "1.74", "accuracy": "87.731", "wer": "12.261", "wps": "69", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "201600", "lr": "0.000109156", "gnorm": "2.329", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "571795"} [2024-07-12 15:40:16,497][train_inner][INFO] - {"epoch": 2, "update": 1.338, "loss": "0.799", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "111.825", "wer_total": "127.1", "n_error": "15.275", "ppl": "1.74", "accuracy": "87.982", "wer": "12.018", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "201800", "lr": "0.000108829", "gnorm": "2.348", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "572161"} [2024-07-12 15:45:41,421][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-12 15:46:23,562][train_inner][INFO] - {"epoch": 2, "update": 1.34, "loss": "0.799", "ntokens": "126.745", "acc_total": "126.745", "n_correct": "111.35", "wer_total": "126.745", "n_error": "15.395", "ppl": "1.74", "accuracy": "87.854", "wer": "12.146", "wps": "69.1", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "202000", "lr": "0.000108504", "gnorm": "2.29", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "572528"} [2024-07-12 15:52:28,506][train_inner][INFO] - {"epoch": 2, "update": 1.341, "loss": "0.809", "ntokens": "126.14", "acc_total": "126.14", "n_correct": "110.765", "wer_total": "126.14", "n_error": "15.375", "ppl": "1.75", "accuracy": "87.811", "wer": "12.189", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "202200", "lr": "0.000108179", "gnorm": "2.421", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "572893"} [2024-07-12 15:58:33,673][train_inner][INFO] - {"epoch": 2, "update": 1.342, "loss": "0.827", "ntokens": "126.835", "acc_total": "126.835", "n_correct": "111.275", "wer_total": "126.835", "n_error": "15.56", "ppl": "1.77", "accuracy": "87.732", "wer": "12.268", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "202400", "lr": "0.000107856", "gnorm": "2.335", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "573258"} [2024-07-12 16:01:36,234][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 16:43:27,619][valid][INFO] - {"epoch": 2, "valid_loss": "0.71", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1031", "valid_wer_total": "18.1585", "valid_n_error": "2.05502", "valid_ppl": "1.64", "valid_accuracy": "88.681", "valid_wer": "11.317", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "202500", "valid_best_accuracy": "88.681"} [2024-07-12 16:43:27,620][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 202500 updates [2024-07-12 16:43:27,620][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_202500.pt [2024-07-12 16:43:30,864][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_202500.pt [2024-07-12 16:43:35,172][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_202500.pt (epoch 2 @ 202500 updates, score 88.681) (writing took 7.5522837589960545 seconds) [2024-07-12 16:46:37,561][train_inner][INFO] - {"epoch": 2, "update": 1.344, "loss": "0.805", "ntokens": "127.61", "acc_total": "127.61", "n_correct": "112.145", "wer_total": "127.61", "n_error": "15.465", "ppl": "1.75", "accuracy": "87.881", "wer": "12.119", "wps": "8.8", "ups": "0.07", "wpb": "127.6", "bsz": "8", "num_updates": "202600", "lr": "0.000107533", "gnorm": "2.294", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "576142"} [2024-07-12 16:52:42,763][train_inner][INFO] - {"epoch": 2, "update": 1.345, "loss": "0.808", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "111.84", "wer_total": "127.385", "n_error": "15.53", "ppl": "1.75", "accuracy": "87.797", "wer": "12.191", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "202800", "lr": "0.000107211", "gnorm": "2.369", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "576507"} [2024-07-12 16:58:47,604][train_inner][INFO] - {"epoch": 2, "update": 1.346, "loss": "0.775", "ntokens": "127.935", "acc_total": "127.935", "n_correct": "113.055", "wer_total": "127.935", "n_error": "14.875", "ppl": "1.71", "accuracy": "88.369", "wer": "11.627", "wps": "70.1", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "203000", "lr": "0.000106891", "gnorm": "2.43", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "576872"} [2024-07-12 17:04:52,777][train_inner][INFO] - {"epoch": 2, "update": 1.348, "loss": "0.842", "ntokens": "126.615", "acc_total": "126.615", "n_correct": "110.7", "wer_total": "126.615", "n_error": "15.91", "ppl": "1.79", "accuracy": "87.43", "wer": "12.566", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "203200", "lr": "0.000106571", "gnorm": "2.448", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "577237"} [2024-07-12 17:10:58,043][train_inner][INFO] - {"epoch": 2, "update": 1.349, "loss": "0.814", "ntokens": "127.505", "acc_total": "127.505", "n_correct": "111.71", "wer_total": "127.505", "n_error": "15.785", "ppl": "1.76", "accuracy": "87.612", "wer": "12.38", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "203400", "lr": "0.000106252", "gnorm": "2.441", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "577602"} [2024-07-12 17:17:03,232][train_inner][INFO] - {"epoch": 2, "update": 1.35, "loss": "0.821", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "110.42", "wer_total": "126.325", "n_error": "15.9", "ppl": "1.77", "accuracy": "87.409", "wer": "12.587", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "203600", "lr": "0.000105934", "gnorm": "2.498", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "577967"} [2024-07-12 17:23:08,466][train_inner][INFO] - {"epoch": 2, "update": 1.352, "loss": "0.825", "ntokens": "126.43", "acc_total": "126.43", "n_correct": "110.735", "wer_total": "126.43", "n_error": "15.695", "ppl": "1.77", "accuracy": "87.586", "wer": "12.414", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "203800", "lr": "0.000105617", "gnorm": "2.454", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "578333"} [2024-07-12 17:29:13,627][train_inner][INFO] - {"epoch": 2, "update": 1.353, "loss": "0.79", "ntokens": "127.565", "acc_total": "127.565", "n_correct": "112.045", "wer_total": "127.565", "n_error": "15.515", "ppl": "1.73", "accuracy": "87.834", "wer": "12.162", "wps": "69.9", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "204000", "lr": "0.000105301", "gnorm": "2.417", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "578698"} [2024-07-12 17:35:18,581][train_inner][INFO] - {"epoch": 2, "update": 1.354, "loss": "0.818", "ntokens": "126.72", "acc_total": "126.72", "n_correct": "111.3", "wer_total": "126.72", "n_error": "15.41", "ppl": "1.76", "accuracy": "87.831", "wer": "12.161", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "204200", "lr": "0.000104986", "gnorm": "2.448", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "579063"} [2024-07-12 17:41:23,680][train_inner][INFO] - {"epoch": 2, "update": 1.355, "loss": "0.816", "ntokens": "126.66", "acc_total": "126.66", "n_correct": "110.785", "wer_total": "126.66", "n_error": "15.875", "ppl": "1.76", "accuracy": "87.466", "wer": "12.534", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "204400", "lr": "0.000104672", "gnorm": "2.342", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "579428"} [2024-07-12 17:47:28,805][train_inner][INFO] - {"epoch": 2, "update": 1.357, "loss": "0.809", "ntokens": "126.84", "acc_total": "126.84", "n_correct": "111.34", "wer_total": "126.84", "n_error": "15.49", "ppl": "1.75", "accuracy": "87.78", "wer": "12.212", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "204600", "lr": "0.000104359", "gnorm": "2.451", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "579793"} [2024-07-12 17:53:34,217][train_inner][INFO] - {"epoch": 2, "update": 1.358, "loss": "0.827", "ntokens": "127.02", "acc_total": "127.02", "n_correct": "111.27", "wer_total": "127.02", "n_error": "15.74", "ppl": "1.77", "accuracy": "87.6", "wer": "12.392", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "204800", "lr": "0.000104047", "gnorm": "2.542", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "580158"} [2024-07-12 17:59:39,619][train_inner][INFO] - {"epoch": 2, "update": 1.359, "loss": "0.797", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "112.025", "wer_total": "127.385", "n_error": "15.36", "ppl": "1.74", "accuracy": "87.942", "wer": "12.058", "wps": "69.7", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "205000", "lr": "0.000103736", "gnorm": "2.469", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "580524"} [2024-07-12 17:59:39,620][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 18:41:31,134][valid][INFO] - {"epoch": 2, "valid_loss": "0.718", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.0762", "valid_wer_total": "18.1585", "valid_n_error": "2.08174", "valid_ppl": "1.64", "valid_accuracy": "88.533", "valid_wer": "11.464", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "205000", "valid_best_accuracy": "88.681"} [2024-07-12 18:41:31,135][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 205000 updates [2024-07-12 18:41:31,135][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_205000.pt [2024-07-12 18:41:34,342][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_205000.pt [2024-07-12 18:41:36,528][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_205000.pt (epoch 2 @ 205000 updates, score 88.533) (writing took 5.392868432914838 seconds) [2024-07-12 18:47:41,297][train_inner][INFO] - {"epoch": 2, "update": 1.361, "loss": "0.799", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "111.015", "wer_total": "126.46", "n_error": "15.445", "ppl": "1.74", "accuracy": "87.787", "wer": "12.213", "wps": "8.8", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "205200", "lr": "0.000103426", "gnorm": "2.478", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "583405"} [2024-07-12 18:52:22,445][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-12 18:53:48,168][train_inner][INFO] - {"epoch": 2, "update": 1.362, "loss": "0.784", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "111.765", "wer_total": "127.235", "n_error": "15.465", "ppl": "1.72", "accuracy": "87.841", "wer": "12.155", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "205400", "lr": "0.000103116", "gnorm": "2.407", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "583772"} [2024-07-12 18:59:53,295][train_inner][INFO] - {"epoch": 2, "update": 1.363, "loss": "0.789", "ntokens": "127.285", "acc_total": "127.285", "n_correct": "111.88", "wer_total": "127.285", "n_error": "15.4", "ppl": "1.73", "accuracy": "87.897", "wer": "12.099", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "205600", "lr": "0.000102808", "gnorm": "2.383", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "584137"} [2024-07-12 19:05:58,585][train_inner][INFO] - {"epoch": 2, "update": 1.365, "loss": "0.794", "ntokens": "127.12", "acc_total": "127.12", "n_correct": "111.88", "wer_total": "127.12", "n_error": "15.24", "ppl": "1.73", "accuracy": "88.011", "wer": "11.989", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "205800", "lr": "0.0001025", "gnorm": "2.51", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "584503"} [2024-07-12 19:12:04,289][train_inner][INFO] - {"epoch": 2, "update": 1.366, "loss": "0.802", "ntokens": "128.26", "acc_total": "128.26", "n_correct": "112.89", "wer_total": "128.26", "n_error": "15.365", "ppl": "1.74", "accuracy": "88.017", "wer": "11.98", "wps": "70.1", "ups": "0.55", "wpb": "128.3", "bsz": "8", "num_updates": "206000", "lr": "0.000102194", "gnorm": "2.531", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "584868"} [2024-07-12 19:18:09,641][train_inner][INFO] - {"epoch": 2, "update": 1.367, "loss": "0.789", "ntokens": "126.895", "acc_total": "126.895", "n_correct": "111.61", "wer_total": "126.895", "n_error": "15.275", "ppl": "1.73", "accuracy": "87.955", "wer": "12.038", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "206200", "lr": "0.000101888", "gnorm": "2.405", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "585234"} [2024-07-12 19:24:15,185][train_inner][INFO] - {"epoch": 2, "update": 1.369, "loss": "0.772", "ntokens": "127.28", "acc_total": "127.28", "n_correct": "112.065", "wer_total": "127.28", "n_error": "15.215", "ppl": "1.71", "accuracy": "88.046", "wer": "11.954", "wps": "69.6", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "206400", "lr": "0.000101583", "gnorm": "2.276", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "585599"} [2024-07-12 19:30:20,711][train_inner][INFO] - {"epoch": 2, "update": 1.37, "loss": "0.861", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "110.475", "wer_total": "127.075", "n_error": "16.6", "ppl": "1.82", "accuracy": "86.937", "wer": "13.063", "wps": "69.5", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "206600", "lr": "0.000101279", "gnorm": "2.731", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "585965"} [2024-07-12 19:36:26,615][train_inner][INFO] - {"epoch": 2, "update": 1.371, "loss": "0.81", "ntokens": "127.365", "acc_total": "127.365", "n_correct": "111.54", "wer_total": "127.365", "n_error": "15.825", "ppl": "1.75", "accuracy": "87.575", "wer": "12.425", "wps": "69.6", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "206800", "lr": "0.000100976", "gnorm": "2.621", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "586331"} [2024-07-12 19:42:32,409][train_inner][INFO] - {"epoch": 2, "update": 1.373, "loss": "0.826", "ntokens": "126.845", "acc_total": "126.845", "n_correct": "110.88", "wer_total": "126.845", "n_error": "15.96", "ppl": "1.77", "accuracy": "87.414", "wer": "12.582", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "207000", "lr": "0.000100674", "gnorm": "2.475", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "586696"} [2024-07-12 19:48:37,977][train_inner][INFO] - {"epoch": 2, "update": 1.374, "loss": "0.833", "ntokens": "127.355", "acc_total": "127.355", "n_correct": "111.805", "wer_total": "127.355", "n_error": "15.55", "ppl": "1.78", "accuracy": "87.79", "wer": "12.21", "wps": "69.7", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "207200", "lr": "0.000100373", "gnorm": "2.429", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "587062"} [2024-07-12 19:54:43,584][train_inner][INFO] - {"epoch": 2, "update": 1.375, "loss": "0.811", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "111.3", "wer_total": "127.155", "n_error": "15.845", "ppl": "1.75", "accuracy": "87.531", "wer": "12.461", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "207400", "lr": "0.000100073", "gnorm": "2.418", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "587428"} [2024-07-12 19:57:46,292][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 20:39:39,344][valid][INFO] - {"epoch": 2, "valid_loss": "0.703", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1168", "valid_wer_total": "18.1585", "valid_n_error": "2.04135", "valid_ppl": "1.63", "valid_accuracy": "88.757", "valid_wer": "11.242", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "207500", "valid_best_accuracy": "88.757"} [2024-07-12 20:39:39,345][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 207500 updates [2024-07-12 20:39:39,345][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_207500.pt [2024-07-12 20:39:42,584][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_207500.pt [2024-07-12 20:39:46,720][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_207500.pt (epoch 2 @ 207500 updates, score 88.757) (writing took 7.374980028020218 seconds) [2024-07-12 20:42:49,126][train_inner][INFO] - {"epoch": 2, "update": 1.377, "loss": "0.773", "ntokens": "126.685", "acc_total": "126.685", "n_correct": "111.98", "wer_total": "126.685", "n_error": "14.705", "ppl": "1.71", "accuracy": "88.392", "wer": "11.608", "wps": "8.8", "ups": "0.07", "wpb": "126.7", "bsz": "8", "num_updates": "207600", "lr": "9.97737e-05", "gnorm": "2.207", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "590313"} [2024-07-12 20:48:54,619][train_inner][INFO] - {"epoch": 2, "update": 1.378, "loss": "0.84", "ntokens": "128.24", "acc_total": "128.24", "n_correct": "112.215", "wer_total": "128.24", "n_error": "16.02", "ppl": "1.79", "accuracy": "87.504", "wer": "12.492", "wps": "70.2", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "207800", "lr": "9.94752e-05", "gnorm": "2.523", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "590679"} [2024-07-12 20:54:59,890][train_inner][INFO] - {"epoch": 2, "update": 1.379, "loss": "0.834", "ntokens": "126.57", "acc_total": "126.57", "n_correct": "110.76", "wer_total": "126.57", "n_error": "15.81", "ppl": "1.78", "accuracy": "87.509", "wer": "12.491", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "208000", "lr": "9.91776e-05", "gnorm": "2.526", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "591044"} [2024-07-12 21:01:05,596][train_inner][INFO] - {"epoch": 2, "update": 1.381, "loss": "0.809", "ntokens": "127.81", "acc_total": "127.81", "n_correct": "112.32", "wer_total": "127.81", "n_error": "15.49", "ppl": "1.75", "accuracy": "87.88", "wer": "12.12", "wps": "69.9", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "208200", "lr": "9.8881e-05", "gnorm": "2.429", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "591410"} [2024-07-12 21:07:10,836][train_inner][INFO] - {"epoch": 2, "update": 1.382, "loss": "0.798", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "111.135", "wer_total": "126.815", "n_error": "15.675", "ppl": "1.74", "accuracy": "87.636", "wer": "12.361", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "208400", "lr": "9.85852e-05", "gnorm": "2.357", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "591775"} [2024-07-12 21:13:16,197][train_inner][INFO] - {"epoch": 2, "update": 1.383, "loss": "0.84", "ntokens": "126.345", "acc_total": "126.345", "n_correct": "110.28", "wer_total": "126.345", "n_error": "16.06", "ppl": "1.79", "accuracy": "87.285", "wer": "12.711", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "208600", "lr": "9.82903e-05", "gnorm": "2.47", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "592140"} [2024-07-12 21:19:21,514][train_inner][INFO] - {"epoch": 2, "update": 1.385, "loss": "0.811", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "111.335", "wer_total": "127.135", "n_error": "15.8", "ppl": "1.75", "accuracy": "87.572", "wer": "12.428", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "208800", "lr": "9.79963e-05", "gnorm": "2.426", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "592506"} [2024-07-12 21:25:26,828][train_inner][INFO] - {"epoch": 2, "update": 1.386, "loss": "0.762", "ntokens": "126.49", "acc_total": "126.49", "n_correct": "111.785", "wer_total": "126.49", "n_error": "14.7", "ppl": "1.7", "accuracy": "88.375", "wer": "11.621", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "209000", "lr": "9.77032e-05", "gnorm": "2.291", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "592871"} [2024-07-12 21:31:32,131][train_inner][INFO] - {"epoch": 2, "update": 1.387, "loss": "0.789", "ntokens": "126.75", "acc_total": "126.75", "n_correct": "111.7", "wer_total": "126.75", "n_error": "15.05", "ppl": "1.73", "accuracy": "88.126", "wer": "11.874", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "209200", "lr": "9.74109e-05", "gnorm": "2.346", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "593236"} [2024-07-12 21:37:37,406][train_inner][INFO] - {"epoch": 2, "update": 1.389, "loss": "0.774", "ntokens": "127.5", "acc_total": "127.5", "n_correct": "112.385", "wer_total": "127.5", "n_error": "15.105", "ppl": "1.71", "accuracy": "88.145", "wer": "11.847", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "209400", "lr": "9.71195e-05", "gnorm": "2.387", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "593601"} [2024-07-12 21:43:42,801][train_inner][INFO] - {"epoch": 2, "update": 1.39, "loss": "0.784", "ntokens": "127.48", "acc_total": "127.48", "n_correct": "112.06", "wer_total": "127.48", "n_error": "15.415", "ppl": "1.72", "accuracy": "87.904", "wer": "12.092", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "209600", "lr": "9.6829e-05", "gnorm": "2.272", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "593967"} [2024-07-12 21:49:48,141][train_inner][INFO] - {"epoch": 2, "update": 1.391, "loss": "0.771", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "112.04", "wer_total": "127.485", "n_error": "15.445", "ppl": "1.71", "accuracy": "87.885", "wer": "12.115", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "209800", "lr": "9.65394e-05", "gnorm": "2.234", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "594332"} [2024-07-12 21:55:53,441][train_inner][INFO] - {"epoch": 2, "update": 1.393, "loss": "0.799", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "111.105", "wer_total": "126.605", "n_error": "15.495", "ppl": "1.74", "accuracy": "87.757", "wer": "12.239", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "210000", "lr": "9.62506e-05", "gnorm": "2.271", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "594697"} [2024-07-12 21:55:53,442][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-12 22:37:43,889][valid][INFO] - {"epoch": 2, "valid_loss": "0.698", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1394", "valid_wer_total": "18.1585", "valid_n_error": "2.01872", "valid_ppl": "1.62", "valid_accuracy": "88.881", "valid_wer": "11.117", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "210000", "valid_best_accuracy": "88.881"} [2024-07-12 22:37:43,890][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 210000 updates [2024-07-12 22:37:43,890][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_210000.pt [2024-07-12 22:37:47,224][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_210000.pt [2024-07-12 22:37:51,497][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_210000.pt (epoch 2 @ 210000 updates, score 88.881) (writing took 7.6077574549708515 seconds) [2024-07-12 22:41:02,709][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-12 22:43:58,024][train_inner][INFO] - {"epoch": 2, "update": 1.394, "loss": "0.8", "ntokens": "125.935", "acc_total": "125.935", "n_correct": "110.77", "wer_total": "125.935", "n_error": "15.165", "ppl": "1.74", "accuracy": "87.958", "wer": "12.042", "wps": "8.7", "ups": "0.07", "wpb": "125.9", "bsz": "8", "num_updates": "210200", "lr": "9.59627e-05", "gnorm": "2.41", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "597582"} [2024-07-12 22:50:03,252][train_inner][INFO] - {"epoch": 2, "update": 1.395, "loss": "0.8", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "112.255", "wer_total": "127.41", "n_error": "15.15", "ppl": "1.74", "accuracy": "88.105", "wer": "11.891", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "210400", "lr": "9.56757e-05", "gnorm": "2.369", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "597947"} [2024-07-12 22:56:08,413][train_inner][INFO] - {"epoch": 2, "update": 1.397, "loss": "0.834", "ntokens": "126.245", "acc_total": "126.245", "n_correct": "110.365", "wer_total": "126.245", "n_error": "15.88", "ppl": "1.78", "accuracy": "87.421", "wer": "12.579", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "210600", "lr": "9.53895e-05", "gnorm": "2.643", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "598312"} [2024-07-12 23:02:13,674][train_inner][INFO] - {"epoch": 2, "update": 1.398, "loss": "0.748", "ntokens": "126.225", "acc_total": "126.225", "n_correct": "111.175", "wer_total": "126.225", "n_error": "15.05", "ppl": "1.68", "accuracy": "88.077", "wer": "11.923", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "210800", "lr": "9.51041e-05", "gnorm": "2.308", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "598678"} [2024-07-12 23:07:24,011][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-12 23:08:20,779][train_inner][INFO] - {"epoch": 2, "update": 1.399, "loss": "0.803", "ntokens": "127.945", "acc_total": "127.945", "n_correct": "112.385", "wer_total": "127.945", "n_error": "15.56", "ppl": "1.74", "accuracy": "87.839", "wer": "12.161", "wps": "69.7", "ups": "0.54", "wpb": "127.9", "bsz": "8", "num_updates": "211000", "lr": "9.48197e-05", "gnorm": "2.323", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "599045"} [2024-07-12 23:14:26,067][train_inner][INFO] - {"epoch": 2, "update": 1.401, "loss": "0.798", "ntokens": "126.12", "acc_total": "126.12", "n_correct": "110.57", "wer_total": "126.12", "n_error": "15.545", "ppl": "1.74", "accuracy": "87.67", "wer": "12.326", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "211200", "lr": "9.4536e-05", "gnorm": "2.292", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "599410"} [2024-07-12 23:20:31,151][train_inner][INFO] - {"epoch": 2, "update": 1.402, "loss": "0.829", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "111.175", "wer_total": "127.33", "n_error": "16.15", "ppl": "1.78", "accuracy": "87.312", "wer": "12.684", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "211400", "lr": "9.42532e-05", "gnorm": "2.276", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "599775"} [2024-07-12 23:26:36,148][train_inner][INFO] - {"epoch": 2, "update": 1.403, "loss": "0.851", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "110.685", "wer_total": "126.945", "n_error": "16.26", "ppl": "1.8", "accuracy": "87.191", "wer": "12.809", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "211600", "lr": "9.39713e-05", "gnorm": "2.371", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "600140"} [2024-07-12 23:32:41,304][train_inner][INFO] - {"epoch": 2, "update": 1.405, "loss": "0.787", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "111.81", "wer_total": "126.975", "n_error": "15.165", "ppl": "1.73", "accuracy": "88.057", "wer": "11.943", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "211800", "lr": "9.36902e-05", "gnorm": "2.284", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "600505"} [2024-07-12 23:38:46,378][train_inner][INFO] - {"epoch": 2, "update": 1.406, "loss": "0.788", "ntokens": "127.195", "acc_total": "127.195", "n_correct": "111.69", "wer_total": "127.195", "n_error": "15.505", "ppl": "1.73", "accuracy": "87.81", "wer": "12.19", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "212000", "lr": "9.341e-05", "gnorm": "2.251", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "600870"} [2024-07-12 23:44:51,725][train_inner][INFO] - {"epoch": 2, "update": 1.407, "loss": "0.801", "ntokens": "127.215", "acc_total": "127.215", "n_correct": "111.485", "wer_total": "127.215", "n_error": "15.73", "ppl": "1.74", "accuracy": "87.635", "wer": "12.365", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "212200", "lr": "9.31306e-05", "gnorm": "2.189", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "601236"} [2024-07-12 23:50:57,016][train_inner][INFO] - {"epoch": 2, "update": 1.409, "loss": "0.791", "ntokens": "126.765", "acc_total": "126.765", "n_correct": "111.74", "wer_total": "126.765", "n_error": "15.025", "ppl": "1.73", "accuracy": "88.147", "wer": "11.853", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "212400", "lr": "9.2852e-05", "gnorm": "2.306", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "601601"} [2024-07-12 23:53:59,771][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 00:35:49,205][valid][INFO] - {"epoch": 2, "valid_loss": "0.693", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1475", "valid_wer_total": "18.1585", "valid_n_error": "2.01059", "valid_ppl": "1.62", "valid_accuracy": "88.925", "valid_wer": "11.072", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "212500", "valid_best_accuracy": "88.925"} [2024-07-13 00:35:49,205][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 212500 updates [2024-07-13 00:35:49,206][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_212500.pt [2024-07-13 00:35:52,440][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_212500.pt [2024-07-13 00:35:56,739][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_212500.pt (epoch 2 @ 212500 updates, score 88.925) (writing took 7.534117667004466 seconds) [2024-07-13 00:38:59,070][train_inner][INFO] - {"epoch": 2, "update": 1.41, "loss": "0.761", "ntokens": "126.795", "acc_total": "126.795", "n_correct": "112.155", "wer_total": "126.795", "n_error": "14.635", "ppl": "1.69", "accuracy": "88.454", "wer": "11.542", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "212600", "lr": "9.25742e-05", "gnorm": "2.42", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "604483"} [2024-07-13 00:45:04,484][train_inner][INFO] - {"epoch": 2, "update": 1.411, "loss": "0.794", "ntokens": "127.9", "acc_total": "127.9", "n_correct": "112.395", "wer_total": "127.9", "n_error": "15.505", "ppl": "1.73", "accuracy": "87.877", "wer": "12.123", "wps": "70", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "212800", "lr": "9.22973e-05", "gnorm": "2.302", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "604849"} [2024-07-13 00:51:09,917][train_inner][INFO] - {"epoch": 2, "update": 1.413, "loss": "0.759", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "112.765", "wer_total": "127.345", "n_error": "14.575", "ppl": "1.69", "accuracy": "88.551", "wer": "11.445", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "213000", "lr": "9.20212e-05", "gnorm": "2.477", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "605214"} [2024-07-13 00:57:15,352][train_inner][INFO] - {"epoch": 2, "update": 1.414, "loss": "0.783", "ntokens": "126.77", "acc_total": "126.77", "n_correct": "111.655", "wer_total": "126.77", "n_error": "15.115", "ppl": "1.72", "accuracy": "88.077", "wer": "11.923", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "213200", "lr": "9.1746e-05", "gnorm": "2.32", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "605579"} [2024-07-13 01:03:20,694][train_inner][INFO] - {"epoch": 2, "update": 1.415, "loss": "0.788", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "111.475", "wer_total": "126.555", "n_error": "15.075", "ppl": "1.73", "accuracy": "88.084", "wer": "11.912", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "213400", "lr": "9.14715e-05", "gnorm": "2.321", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "605945"} [2024-07-13 01:09:25,984][train_inner][INFO] - {"epoch": 2, "update": 1.416, "loss": "0.794", "ntokens": "126.985", "acc_total": "126.985", "n_correct": "111.615", "wer_total": "126.985", "n_error": "15.37", "ppl": "1.73", "accuracy": "87.896", "wer": "12.104", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "213600", "lr": "9.11979e-05", "gnorm": "2.353", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "606310"} [2024-07-13 01:15:31,336][train_inner][INFO] - {"epoch": 2, "update": 1.418, "loss": "0.796", "ntokens": "126.49", "acc_total": "126.49", "n_correct": "111.065", "wer_total": "126.49", "n_error": "15.425", "ppl": "1.74", "accuracy": "87.805", "wer": "12.195", "wps": "69.2", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "213800", "lr": "9.09251e-05", "gnorm": "2.241", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "606675"} [2024-07-13 01:21:29,315][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-13 01:21:38,532][train_inner][INFO] - {"epoch": 2, "update": 1.419, "loss": "0.789", "ntokens": "126.765", "acc_total": "126.765", "n_correct": "111.41", "wer_total": "126.765", "n_error": "15.345", "ppl": "1.73", "accuracy": "87.887", "wer": "12.105", "wps": "69", "ups": "0.54", "wpb": "126.8", "bsz": "8", "num_updates": "214000", "lr": "9.06532e-05", "gnorm": "2.313", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "607043"} [2024-07-13 01:27:43,659][train_inner][INFO] - {"epoch": 2, "update": 1.42, "loss": "0.805", "ntokens": "127.105", "acc_total": "127.105", "n_correct": "111.825", "wer_total": "127.105", "n_error": "15.28", "ppl": "1.75", "accuracy": "87.978", "wer": "12.022", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "214200", "lr": "9.0382e-05", "gnorm": "2.351", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "607408"} [2024-07-13 01:33:49,038][train_inner][INFO] - {"epoch": 2, "update": 1.422, "loss": "0.804", "ntokens": "126.85", "acc_total": "126.85", "n_correct": "111.405", "wer_total": "126.85", "n_error": "15.445", "ppl": "1.75", "accuracy": "87.824", "wer": "12.176", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "214400", "lr": "9.01116e-05", "gnorm": "2.313", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "607773"} [2024-07-13 01:39:54,574][train_inner][INFO] - {"epoch": 2, "update": 1.423, "loss": "0.812", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "111.675", "wer_total": "127.135", "n_error": "15.46", "ppl": "1.76", "accuracy": "87.84", "wer": "12.16", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "214600", "lr": "8.98421e-05", "gnorm": "2.339", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "608139"} [2024-07-13 01:45:59,904][train_inner][INFO] - {"epoch": 2, "update": 1.424, "loss": "0.791", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "111.07", "wer_total": "126.265", "n_error": "15.185", "ppl": "1.73", "accuracy": "87.966", "wer": "12.026", "wps": "69.1", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "214800", "lr": "8.95733e-05", "gnorm": "2.271", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "608504"} [2024-07-13 01:52:05,303][train_inner][INFO] - {"epoch": 2, "update": 1.426, "loss": "0.781", "ntokens": "126.24", "acc_total": "126.24", "n_correct": "111.39", "wer_total": "126.24", "n_error": "14.845", "ppl": "1.72", "accuracy": "88.237", "wer": "11.759", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "215000", "lr": "8.93054e-05", "gnorm": "2.373", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "608869"} [2024-07-13 01:52:05,304][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 02:33:58,084][valid][INFO] - {"epoch": 2, "valid_loss": "0.687", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1422", "valid_wer_total": "18.1585", "valid_n_error": "2.01609", "valid_ppl": "1.61", "valid_accuracy": "88.896", "valid_wer": "11.103", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "215000", "valid_best_accuracy": "88.925"} [2024-07-13 02:33:58,085][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 215000 updates [2024-07-13 02:33:58,085][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_215000.pt [2024-07-13 02:34:01,293][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_215000.pt [2024-07-13 02:34:03,464][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_215000.pt (epoch 2 @ 215000 updates, score 88.896) (writing took 5.379603252047673 seconds) [2024-07-13 02:40:08,717][train_inner][INFO] - {"epoch": 2, "update": 1.427, "loss": "0.75", "ntokens": "127.045", "acc_total": "127.045", "n_correct": "112.785", "wer_total": "127.045", "n_error": "14.255", "ppl": "1.68", "accuracy": "88.776", "wer": "11.22", "wps": "8.8", "ups": "0.07", "wpb": "127", "bsz": "8", "num_updates": "215200", "lr": "8.90383e-05", "gnorm": "2.466", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "611753"} [2024-07-13 02:46:13,879][train_inner][INFO] - {"epoch": 2, "update": 1.428, "loss": "0.778", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "111.79", "wer_total": "126.93", "n_error": "15.14", "ppl": "1.71", "accuracy": "88.072", "wer": "11.928", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "215400", "lr": "8.87719e-05", "gnorm": "2.295", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "612118"} [2024-07-13 02:52:17,944][train_inner][INFO] - {"epoch": 2, "update": 1.43, "loss": "0.777", "ntokens": "128.225", "acc_total": "128.225", "n_correct": "112.845", "wer_total": "128.225", "n_error": "15.38", "ppl": "1.71", "accuracy": "88.005", "wer": "11.995", "wps": "70.4", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "215600", "lr": "8.85064e-05", "gnorm": "2.214", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "612482"} [2024-07-13 02:58:21,707][train_inner][INFO] - {"epoch": 2, "update": 1.431, "loss": "0.749", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "112.17", "wer_total": "126.92", "n_error": "14.745", "ppl": "1.68", "accuracy": "88.379", "wer": "11.618", "wps": "69.8", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "215800", "lr": "8.82417e-05", "gnorm": "2.179", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "612846"} [2024-07-13 03:04:25,459][train_inner][INFO] - {"epoch": 2, "update": 1.432, "loss": "0.791", "ntokens": "127.505", "acc_total": "127.505", "n_correct": "112.07", "wer_total": "127.505", "n_error": "15.435", "ppl": "1.73", "accuracy": "87.895", "wer": "12.105", "wps": "70.1", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "216000", "lr": "8.79777e-05", "gnorm": "2.293", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "613210"} [2024-07-13 03:10:29,017][train_inner][INFO] - {"epoch": 2, "update": 1.434, "loss": "0.784", "ntokens": "127.81", "acc_total": "127.81", "n_correct": "112.195", "wer_total": "127.81", "n_error": "15.61", "ppl": "1.72", "accuracy": "87.783", "wer": "12.213", "wps": "70.3", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "216200", "lr": "8.77145e-05", "gnorm": "2.346", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "613573"} [2024-07-13 03:16:32,385][train_inner][INFO] - {"epoch": 2, "update": 1.435, "loss": "0.788", "ntokens": "126.99", "acc_total": "126.99", "n_correct": "111.43", "wer_total": "126.99", "n_error": "15.555", "ppl": "1.73", "accuracy": "87.747", "wer": "12.249", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "216400", "lr": "8.74522e-05", "gnorm": "2.265", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "613936"} [2024-07-13 03:22:35,918][train_inner][INFO] - {"epoch": 2, "update": 1.436, "loss": "0.777", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "111.28", "wer_total": "126.46", "n_error": "15.17", "ppl": "1.71", "accuracy": "87.996", "wer": "11.996", "wps": "69.6", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "216600", "lr": "8.71906e-05", "gnorm": "2.499", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "614300"} [2024-07-13 03:28:39,402][train_inner][INFO] - {"epoch": 2, "update": 1.438, "loss": "0.801", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "111.705", "wer_total": "127.155", "n_error": "15.435", "ppl": "1.74", "accuracy": "87.849", "wer": "12.139", "wps": "70", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "216800", "lr": "8.69298e-05", "gnorm": "2.211", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "614663"} [2024-07-13 03:34:42,859][train_inner][INFO] - {"epoch": 2, "update": 1.439, "loss": "0.79", "ntokens": "126.83", "acc_total": "126.83", "n_correct": "111.53", "wer_total": "126.83", "n_error": "15.3", "ppl": "1.73", "accuracy": "87.937", "wer": "12.063", "wps": "69.8", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "217000", "lr": "8.66697e-05", "gnorm": "2.228", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "615027"} [2024-07-13 03:40:46,225][train_inner][INFO] - {"epoch": 2, "update": 1.44, "loss": "0.745", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "111.86", "wer_total": "126.27", "n_error": "14.41", "ppl": "1.68", "accuracy": "88.588", "wer": "11.412", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "217200", "lr": "8.64105e-05", "gnorm": "2.116", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "615390"} [2024-07-13 03:46:49,605][train_inner][INFO] - {"epoch": 2, "update": 1.442, "loss": "0.813", "ntokens": "126.38", "acc_total": "126.38", "n_correct": "110.9", "wer_total": "126.38", "n_error": "15.48", "ppl": "1.76", "accuracy": "87.751", "wer": "12.249", "wps": "69.6", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "217400", "lr": "8.6152e-05", "gnorm": "2.209", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "615754"} [2024-07-13 03:49:51,295][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 04:31:27,850][valid][INFO] - {"epoch": 2, "valid_loss": "0.685", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.164", "valid_wer_total": "18.1585", "valid_n_error": "1.99441", "valid_ppl": "1.61", "valid_accuracy": "89.016", "valid_wer": "10.983", "valid_wps": "174.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "217500", "valid_best_accuracy": "89.016"} [2024-07-13 04:31:27,851][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 217500 updates [2024-07-13 04:31:27,851][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_217500.pt [2024-07-13 04:31:31,090][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_217500.pt [2024-07-13 04:31:35,283][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_217500.pt (epoch 2 @ 217500 updates, score 89.016) (writing took 7.432244149968028 seconds) [2024-07-13 04:34:36,625][train_inner][INFO] - {"epoch": 2, "update": 1.443, "loss": "0.787", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "111.26", "wer_total": "126.58", "n_error": "15.31", "ppl": "1.73", "accuracy": "87.897", "wer": "12.095", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "217600", "lr": "8.58943e-05", "gnorm": "2.32", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "618621"} [2024-07-13 04:40:39,887][train_inner][INFO] - {"epoch": 2, "update": 1.444, "loss": "0.751", "ntokens": "126.195", "acc_total": "126.195", "n_correct": "111.22", "wer_total": "126.195", "n_error": "14.97", "ppl": "1.68", "accuracy": "88.133", "wer": "11.863", "wps": "69.5", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "217800", "lr": "8.56374e-05", "gnorm": "2.229", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "618984"} [2024-07-13 04:46:43,282][train_inner][INFO] - {"epoch": 2, "update": 1.446, "loss": "0.805", "ntokens": "127.835", "acc_total": "127.835", "n_correct": "112.145", "wer_total": "127.835", "n_error": "15.69", "ppl": "1.75", "accuracy": "87.726", "wer": "12.274", "wps": "70.4", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "218000", "lr": "8.53812e-05", "gnorm": "2.406", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "619347"} [2024-07-13 04:50:51,926][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-13 04:52:48,302][train_inner][INFO] - {"epoch": 2, "update": 1.447, "loss": "0.762", "ntokens": "127.065", "acc_total": "127.065", "n_correct": "112.425", "wer_total": "127.065", "n_error": "14.64", "ppl": "1.7", "accuracy": "88.478", "wer": "11.522", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "218200", "lr": "8.51258e-05", "gnorm": "2.197", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "619712"} [2024-07-13 04:58:51,508][train_inner][INFO] - {"epoch": 2, "update": 1.448, "loss": "0.801", "ntokens": "125.955", "acc_total": "125.955", "n_correct": "110.82", "wer_total": "125.955", "n_error": "15.13", "ppl": "1.74", "accuracy": "87.984", "wer": "12.012", "wps": "69.4", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "218400", "lr": "8.48712e-05", "gnorm": "2.27", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "620076"} [2024-07-13 05:04:55,039][train_inner][INFO] - {"epoch": 2, "update": 1.45, "loss": "0.807", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "111.915", "wer_total": "127.49", "n_error": "15.575", "ppl": "1.75", "accuracy": "87.783", "wer": "12.217", "wps": "70.1", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "218600", "lr": "8.46173e-05", "gnorm": "2.234", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "620439"} [2024-07-13 05:10:58,256][train_inner][INFO] - {"epoch": 2, "update": 1.451, "loss": "0.778", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "111.865", "wer_total": "127.09", "n_error": "15.225", "ppl": "1.72", "accuracy": "88.02", "wer": "11.98", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "218800", "lr": "8.43642e-05", "gnorm": "2.221", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "620802"} [2024-07-13 05:17:01,765][train_inner][INFO] - {"epoch": 2, "update": 1.452, "loss": "0.804", "ntokens": "127.535", "acc_total": "127.535", "n_correct": "111.985", "wer_total": "127.535", "n_error": "15.55", "ppl": "1.75", "accuracy": "87.807", "wer": "12.193", "wps": "70.2", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "219000", "lr": "8.41118e-05", "gnorm": "2.315", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "621166"} [2024-07-13 05:23:05,264][train_inner][INFO] - {"epoch": 2, "update": 1.454, "loss": "0.768", "ntokens": "127.56", "acc_total": "127.56", "n_correct": "112.265", "wer_total": "127.56", "n_error": "15.295", "ppl": "1.7", "accuracy": "88.01", "wer": "11.99", "wps": "70.2", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "219200", "lr": "8.38602e-05", "gnorm": "2.206", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "621529"} [2024-07-13 05:29:08,747][train_inner][INFO] - {"epoch": 2, "update": 1.455, "loss": "0.762", "ntokens": "126.375", "acc_total": "126.375", "n_correct": "111.555", "wer_total": "126.375", "n_error": "14.82", "ppl": "1.7", "accuracy": "88.273", "wer": "11.727", "wps": "69.5", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "219400", "lr": "8.36094e-05", "gnorm": "2.331", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "621893"} [2024-07-13 05:29:54,012][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-13 05:35:13,651][train_inner][INFO] - {"epoch": 2, "update": 1.456, "loss": "0.792", "ntokens": "126.57", "acc_total": "126.57", "n_correct": "111.73", "wer_total": "126.57", "n_error": "14.84", "ppl": "1.73", "accuracy": "88.275", "wer": "11.725", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "219600", "lr": "8.33593e-05", "gnorm": "2.213", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "622258"} [2024-07-13 05:41:16,667][train_inner][INFO] - {"epoch": 2, "update": 1.458, "loss": "0.765", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "112.125", "wer_total": "126.975", "n_error": "14.85", "ppl": "1.7", "accuracy": "88.305", "wer": "11.695", "wps": "70", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "219800", "lr": "8.311e-05", "gnorm": "2.21", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "622621"} [2024-07-13 05:47:19,990][train_inner][INFO] - {"epoch": 2, "update": 1.459, "loss": "0.788", "ntokens": "126.565", "acc_total": "126.565", "n_correct": "111.46", "wer_total": "126.565", "n_error": "15.105", "ppl": "1.73", "accuracy": "88.065", "wer": "11.935", "wps": "69.7", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "220000", "lr": "8.28614e-05", "gnorm": "2.176", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "622984"} [2024-07-13 05:47:19,990][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 06:29:00,474][valid][INFO] - {"epoch": 2, "valid_loss": "0.682", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1622", "valid_wer_total": "18.1585", "valid_n_error": "1.99596", "valid_ppl": "1.6", "valid_accuracy": "89.007", "valid_wer": "10.992", "valid_wps": "174.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "220000", "valid_best_accuracy": "89.016"} [2024-07-13 06:29:00,475][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 220000 updates [2024-07-13 06:29:00,475][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_220000.pt [2024-07-13 06:29:03,676][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_220000.pt [2024-07-13 06:29:05,847][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_220000.pt (epoch 2 @ 220000 updates, score 89.007) (writing took 5.372381938039325 seconds) [2024-07-13 06:35:09,731][train_inner][INFO] - {"epoch": 2, "update": 1.46, "loss": "0.753", "ntokens": "126.61", "acc_total": "126.61", "n_correct": "111.715", "wer_total": "126.61", "n_error": "14.895", "ppl": "1.69", "accuracy": "88.236", "wer": "11.764", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "220200", "lr": "8.26135e-05", "gnorm": "2.14", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "625854"} [2024-07-13 06:41:13,803][train_inner][INFO] - {"epoch": 2, "update": 1.462, "loss": "0.795", "ntokens": "127.105", "acc_total": "127.105", "n_correct": "111.985", "wer_total": "127.105", "n_error": "15.12", "ppl": "1.74", "accuracy": "88.104", "wer": "11.896", "wps": "69.8", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "220400", "lr": "8.23664e-05", "gnorm": "2.273", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "626218"} [2024-07-13 06:47:17,602][train_inner][INFO] - {"epoch": 2, "update": 1.463, "loss": "0.751", "ntokens": "127.305", "acc_total": "127.305", "n_correct": "112.49", "wer_total": "127.305", "n_error": "14.81", "ppl": "1.68", "accuracy": "88.363", "wer": "11.633", "wps": "70", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "220600", "lr": "8.212e-05", "gnorm": "2.241", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "626582"} [2024-07-13 06:53:21,141][train_inner][INFO] - {"epoch": 2, "update": 1.464, "loss": "0.802", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "111.01", "wer_total": "126.855", "n_error": "15.84", "ppl": "1.74", "accuracy": "87.509", "wer": "12.487", "wps": "69.8", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "220800", "lr": "8.18744e-05", "gnorm": "2.301", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "626945"} [2024-07-13 06:59:24,688][train_inner][INFO] - {"epoch": 2, "update": 1.466, "loss": "0.776", "ntokens": "126.455", "acc_total": "126.455", "n_correct": "111.48", "wer_total": "126.455", "n_error": "14.97", "ppl": "1.71", "accuracy": "88.158", "wer": "11.838", "wps": "69.6", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "221000", "lr": "8.16294e-05", "gnorm": "2.265", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "627309"} [2024-07-13 07:05:28,057][train_inner][INFO] - {"epoch": 2, "update": 1.467, "loss": "0.8", "ntokens": "127.045", "acc_total": "127.045", "n_correct": "111.665", "wer_total": "127.045", "n_error": "15.38", "ppl": "1.74", "accuracy": "87.894", "wer": "12.106", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "221200", "lr": "8.13853e-05", "gnorm": "2.252", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "627672"} [2024-07-13 07:11:31,126][train_inner][INFO] - {"epoch": 2, "update": 1.468, "loss": "0.775", "ntokens": "127.21", "acc_total": "127.21", "n_correct": "112.125", "wer_total": "127.21", "n_error": "15.085", "ppl": "1.71", "accuracy": "88.142", "wer": "11.858", "wps": "70.1", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "221400", "lr": "8.11418e-05", "gnorm": "2.221", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "628035"} [2024-07-13 07:17:34,380][train_inner][INFO] - {"epoch": 2, "update": 1.47, "loss": "0.753", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "112.46", "wer_total": "127.11", "n_error": "14.65", "ppl": "1.69", "accuracy": "88.475", "wer": "11.525", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "221600", "lr": "8.08991e-05", "gnorm": "2.142", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "628398"} [2024-07-13 07:23:37,984][train_inner][INFO] - {"epoch": 2, "update": 1.471, "loss": "0.79", "ntokens": "127.065", "acc_total": "127.065", "n_correct": "112.115", "wer_total": "127.065", "n_error": "14.94", "ppl": "1.73", "accuracy": "88.234", "wer": "11.758", "wps": "69.9", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "221800", "lr": "8.06571e-05", "gnorm": "2.264", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "628762"} [2024-07-13 07:29:41,433][train_inner][INFO] - {"epoch": 2, "update": 1.472, "loss": "0.79", "ntokens": "127.575", "acc_total": "127.575", "n_correct": "112.555", "wer_total": "127.575", "n_error": "15.015", "ppl": "1.73", "accuracy": "88.227", "wer": "11.77", "wps": "70.2", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "222000", "lr": "8.04159e-05", "gnorm": "2.193", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "629125"} [2024-07-13 07:35:44,710][train_inner][INFO] - {"epoch": 2, "update": 1.474, "loss": "0.787", "ntokens": "127.4", "acc_total": "127.4", "n_correct": "112.175", "wer_total": "127.4", "n_error": "15.225", "ppl": "1.73", "accuracy": "88.049", "wer": "11.951", "wps": "70.1", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "222200", "lr": "8.01753e-05", "gnorm": "2.139", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "629489"} [2024-07-13 07:41:47,973][train_inner][INFO] - {"epoch": 2, "update": 1.475, "loss": "0.761", "ntokens": "126.965", "acc_total": "126.965", "n_correct": "112.045", "wer_total": "126.965", "n_error": "14.92", "ppl": "1.69", "accuracy": "88.249", "wer": "11.751", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "222400", "lr": "7.99355e-05", "gnorm": "2.183", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "629852"} [2024-07-13 07:44:49,637][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 08:26:30,654][valid][INFO] - {"epoch": 2, "valid_loss": "0.682", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1759", "valid_wer_total": "18.1585", "valid_n_error": "1.98233", "valid_ppl": "1.6", "valid_accuracy": "89.082", "valid_wer": "10.917", "valid_wps": "174.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "222500", "valid_best_accuracy": "89.082"} [2024-07-13 08:26:30,655][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 222500 updates [2024-07-13 08:26:30,655][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_222500.pt [2024-07-13 08:26:33,901][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_222500.pt [2024-07-13 08:26:38,059][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_222500.pt (epoch 2 @ 222500 updates, score 89.082) (writing took 7.403934038011357 seconds) [2024-07-13 08:29:40,233][train_inner][INFO] - {"epoch": 2, "update": 1.476, "loss": "0.769", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "112.09", "wer_total": "127.09", "n_error": "15", "ppl": "1.7", "accuracy": "88.197", "wer": "11.803", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "222600", "lr": "7.96964e-05", "gnorm": "2.313", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "632724"} [2024-07-13 08:35:44,671][train_inner][INFO] - {"epoch": 2, "update": 1.478, "loss": "0.745", "ntokens": "127.37", "acc_total": "127.37", "n_correct": "112.975", "wer_total": "127.37", "n_error": "14.39", "ppl": "1.68", "accuracy": "88.698", "wer": "11.298", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "222800", "lr": "7.9458e-05", "gnorm": "2.237", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "633089"} [2024-07-13 08:41:48,922][train_inner][INFO] - {"epoch": 2, "update": 1.479, "loss": "0.821", "ntokens": "125.535", "acc_total": "125.535", "n_correct": "110.335", "wer_total": "125.535", "n_error": "15.2", "ppl": "1.77", "accuracy": "87.892", "wer": "12.108", "wps": "68.9", "ups": "0.55", "wpb": "125.5", "bsz": "8", "num_updates": "223000", "lr": "7.92203e-05", "gnorm": "2.359", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "633453"} [2024-07-13 08:47:53,307][train_inner][INFO] - {"epoch": 2, "update": 1.48, "loss": "0.747", "ntokens": "126.825", "acc_total": "126.825", "n_correct": "112.05", "wer_total": "126.825", "n_error": "14.775", "ppl": "1.68", "accuracy": "88.35", "wer": "11.65", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "223200", "lr": "7.89833e-05", "gnorm": "2.204", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "633817"} [2024-07-13 08:53:57,634][train_inner][INFO] - {"epoch": 2, "update": 1.481, "loss": "0.808", "ntokens": "125.675", "acc_total": "125.675", "n_correct": "110.33", "wer_total": "125.675", "n_error": "15.34", "ppl": "1.75", "accuracy": "87.79", "wer": "12.206", "wps": "69", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "223400", "lr": "7.87471e-05", "gnorm": "2.296", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "634182"} [2024-07-13 09:00:02,003][train_inner][INFO] - {"epoch": 2, "update": 1.483, "loss": "0.783", "ntokens": "125.43", "acc_total": "125.43", "n_correct": "110.34", "wer_total": "125.43", "n_error": "15.09", "ppl": "1.72", "accuracy": "87.969", "wer": "12.031", "wps": "68.8", "ups": "0.55", "wpb": "125.4", "bsz": "8", "num_updates": "223600", "lr": "7.85115e-05", "gnorm": "2.276", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "634546"} [2024-07-13 09:06:06,574][train_inner][INFO] - {"epoch": 2, "update": 1.484, "loss": "0.748", "ntokens": "127.005", "acc_total": "127.005", "n_correct": "112.375", "wer_total": "127.005", "n_error": "14.625", "ppl": "1.68", "accuracy": "88.481", "wer": "11.515", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "223800", "lr": "7.82767e-05", "gnorm": "2.148", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "634911"} [2024-07-13 09:12:00,163][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-13 09:12:12,907][train_inner][INFO] - {"epoch": 2, "update": 1.485, "loss": "0.772", "ntokens": "127.15", "acc_total": "127.15", "n_correct": "112.35", "wer_total": "127.15", "n_error": "14.8", "ppl": "1.71", "accuracy": "88.36", "wer": "11.64", "wps": "69.4", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "224000", "lr": "7.80425e-05", "gnorm": "2.307", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "635277"} [2024-07-13 09:18:17,607][train_inner][INFO] - {"epoch": 2, "update": 1.487, "loss": "0.746", "ntokens": "127.16", "acc_total": "127.16", "n_correct": "112.465", "wer_total": "127.16", "n_error": "14.695", "ppl": "1.68", "accuracy": "88.444", "wer": "11.556", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "224200", "lr": "7.78091e-05", "gnorm": "2.21", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "635642"} [2024-07-13 09:24:22,360][train_inner][INFO] - {"epoch": 2, "update": 1.488, "loss": "0.757", "ntokens": "127.435", "acc_total": "127.435", "n_correct": "112.32", "wer_total": "127.435", "n_error": "15.11", "ppl": "1.69", "accuracy": "88.139", "wer": "11.857", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "224400", "lr": "7.75764e-05", "gnorm": "2.239", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "636006"} [2024-07-13 09:30:26,895][train_inner][INFO] - {"epoch": 2, "update": 1.489, "loss": "0.755", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "111.87", "wer_total": "126.905", "n_error": "15.035", "ppl": "1.69", "accuracy": "88.153", "wer": "11.847", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "224600", "lr": "7.73443e-05", "gnorm": "2.156", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "636371"} [2024-07-13 09:36:31,590][train_inner][INFO] - {"epoch": 2, "update": 1.491, "loss": "0.777", "ntokens": "126.14", "acc_total": "126.14", "n_correct": "111.07", "wer_total": "126.14", "n_error": "15.065", "ppl": "1.71", "accuracy": "88.053", "wer": "11.943", "wps": "69.2", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "224800", "lr": "7.71129e-05", "gnorm": "2.267", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "636736"} [2024-07-13 09:42:36,558][train_inner][INFO] - {"epoch": 2, "update": 1.492, "loss": "0.765", "ntokens": "127.48", "acc_total": "127.48", "n_correct": "112.665", "wer_total": "127.48", "n_error": "14.805", "ppl": "1.7", "accuracy": "88.379", "wer": "11.614", "wps": "69.9", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "225000", "lr": "7.68823e-05", "gnorm": "2.173", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "637101"} [2024-07-13 09:42:36,558][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 10:24:27,315][valid][INFO] - {"epoch": 2, "valid_loss": "0.675", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1925", "valid_wer_total": "18.1585", "valid_n_error": "1.96569", "valid_ppl": "1.6", "valid_accuracy": "89.173", "valid_wer": "10.825", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "225000", "valid_best_accuracy": "89.173"} [2024-07-13 10:24:27,316][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 225000 updates [2024-07-13 10:24:27,316][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_225000.pt [2024-07-13 10:24:30,529][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_225000.pt [2024-07-13 10:24:34,822][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_225000.pt (epoch 2 @ 225000 updates, score 89.173) (writing took 7.5058728531003 seconds) [2024-07-13 10:30:39,346][train_inner][INFO] - {"epoch": 2, "update": 1.493, "loss": "0.775", "ntokens": "125.8", "acc_total": "125.8", "n_correct": "111.08", "wer_total": "125.8", "n_error": "14.72", "ppl": "1.71", "accuracy": "88.299", "wer": "11.701", "wps": "8.7", "ups": "0.07", "wpb": "125.8", "bsz": "8", "num_updates": "225200", "lr": "7.66523e-05", "gnorm": "2.248", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "639983"} [2024-07-13 10:36:44,390][train_inner][INFO] - {"epoch": 2, "update": 1.495, "loss": "0.763", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "112.72", "wer_total": "127.465", "n_error": "14.745", "ppl": "1.7", "accuracy": "88.432", "wer": "11.568", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "225400", "lr": "7.6423e-05", "gnorm": "2.232", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "640348"} [2024-07-13 10:42:49,362][train_inner][INFO] - {"epoch": 2, "update": 1.496, "loss": "0.786", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "112.355", "wer_total": "127.49", "n_error": "15.13", "ppl": "1.72", "accuracy": "88.128", "wer": "11.868", "wps": "69.9", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "225600", "lr": "7.61944e-05", "gnorm": "2.17", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "640713"} [2024-07-13 10:48:53,902][train_inner][INFO] - {"epoch": 2, "update": 1.497, "loss": "0.771", "ntokens": "126.525", "acc_total": "126.525", "n_correct": "111.83", "wer_total": "126.525", "n_error": "14.695", "ppl": "1.71", "accuracy": "88.386", "wer": "11.614", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "225800", "lr": "7.59665e-05", "gnorm": "2.121", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "641078"} [2024-07-13 10:54:58,798][train_inner][INFO] - {"epoch": 2, "update": 1.499, "loss": "0.773", "ntokens": "126.395", "acc_total": "126.395", "n_correct": "111.195", "wer_total": "126.395", "n_error": "15.19", "ppl": "1.71", "accuracy": "87.974", "wer": "12.018", "wps": "69.3", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "226000", "lr": "7.57393e-05", "gnorm": "2.195", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "641443"} [2024-07-13 11:01:03,708][train_inner][INFO] - {"epoch": 2, "update": 1.5, "loss": "0.754", "ntokens": "127.295", "acc_total": "127.295", "n_correct": "112.72", "wer_total": "127.295", "n_error": "14.575", "ppl": "1.69", "accuracy": "88.55", "wer": "11.45", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "226200", "lr": "7.55127e-05", "gnorm": "2.305", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "641808"} [2024-07-13 11:07:08,358][train_inner][INFO] - {"epoch": 2, "update": 1.501, "loss": "0.782", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "112.225", "wer_total": "127.145", "n_error": "14.92", "ppl": "1.72", "accuracy": "88.265", "wer": "11.735", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "226400", "lr": "7.52868e-05", "gnorm": "2.108", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "642172"} [2024-07-13 11:13:12,960][train_inner][INFO] - {"epoch": 2, "update": 1.503, "loss": "0.8", "ntokens": "127.415", "acc_total": "127.415", "n_correct": "112.055", "wer_total": "127.415", "n_error": "15.36", "ppl": "1.74", "accuracy": "87.945", "wer": "12.055", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "226600", "lr": "7.50616e-05", "gnorm": "2.184", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "642537"} [2024-07-13 11:19:17,777][train_inner][INFO] - {"epoch": 2, "update": 1.504, "loss": "0.773", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "112.665", "wer_total": "127.64", "n_error": "14.97", "ppl": "1.71", "accuracy": "88.268", "wer": "11.728", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "226800", "lr": "7.48371e-05", "gnorm": "2.142", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "642902"} [2024-07-13 11:25:22,677][train_inner][INFO] - {"epoch": 2, "update": 1.505, "loss": "0.805", "ntokens": "127.195", "acc_total": "127.195", "n_correct": "111.59", "wer_total": "127.195", "n_error": "15.6", "ppl": "1.75", "accuracy": "87.731", "wer": "12.265", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "227000", "lr": "7.46132e-05", "gnorm": "2.304", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "643267"} [2024-07-13 11:31:00,087][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-13 11:31:29,375][train_inner][INFO] - {"epoch": 2, "update": 1.507, "loss": "0.762", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "112.065", "wer_total": "126.945", "n_error": "14.875", "ppl": "1.7", "accuracy": "88.278", "wer": "11.718", "wps": "69.2", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "227200", "lr": "7.43901e-05", "gnorm": "2.225", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "643633"} [2024-07-13 11:37:34,031][train_inner][INFO] - {"epoch": 2, "update": 1.508, "loss": "0.781", "ntokens": "126.465", "acc_total": "126.465", "n_correct": "111.48", "wer_total": "126.465", "n_error": "14.985", "ppl": "1.72", "accuracy": "88.151", "wer": "11.849", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "227400", "lr": "7.41675e-05", "gnorm": "2.176", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "643998"} [2024-07-13 11:40:37,174][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 12:22:23,639][valid][INFO] - {"epoch": 2, "valid_loss": "0.672", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1935", "valid_wer_total": "18.1585", "valid_n_error": "1.96478", "valid_ppl": "1.59", "valid_accuracy": "89.178", "valid_wer": "10.82", "valid_wps": "173.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "227500", "valid_best_accuracy": "89.178"} [2024-07-13 12:22:23,640][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 227500 updates [2024-07-13 12:22:23,640][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_227500.pt [2024-07-13 12:22:26,887][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_227500.pt [2024-07-13 12:22:31,160][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_227500.pt (epoch 2 @ 227500 updates, score 89.178) (writing took 7.520063709001988 seconds) [2024-07-13 12:25:33,197][train_inner][INFO] - {"epoch": 2, "update": 1.509, "loss": "0.78", "ntokens": "126.575", "acc_total": "126.575", "n_correct": "111.68", "wer_total": "126.575", "n_error": "14.895", "ppl": "1.72", "accuracy": "88.232", "wer": "11.768", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "227600", "lr": "7.39457e-05", "gnorm": "2.33", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "646877"} [2024-07-13 12:31:37,877][train_inner][INFO] - {"epoch": 2, "update": 1.511, "loss": "0.732", "ntokens": "126.015", "acc_total": "126.015", "n_correct": "111.56", "wer_total": "126.015", "n_error": "14.445", "ppl": "1.66", "accuracy": "88.529", "wer": "11.463", "wps": "69.1", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "227800", "lr": "7.37245e-05", "gnorm": "2.266", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "647242"} [2024-07-13 12:37:42,356][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "0.756", "ntokens": "127.275", "acc_total": "127.275", "n_correct": "112.47", "wer_total": "127.275", "n_error": "14.8", "ppl": "1.69", "accuracy": "88.368", "wer": "11.628", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "228000", "lr": "7.3504e-05", "gnorm": "2.203", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "647606"} [2024-07-13 12:43:47,214][train_inner][INFO] - {"epoch": 2, "update": 1.513, "loss": "0.771", "ntokens": "128.32", "acc_total": "128.32", "n_correct": "113.035", "wer_total": "128.32", "n_error": "15.285", "ppl": "1.71", "accuracy": "88.088", "wer": "11.912", "wps": "70.3", "ups": "0.55", "wpb": "128.3", "bsz": "8", "num_updates": "228200", "lr": "7.32841e-05", "gnorm": "2.079", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "647971"} [2024-07-13 12:49:52,160][train_inner][INFO] - {"epoch": 2, "update": 1.515, "loss": "0.745", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "112.285", "wer_total": "126.6", "n_error": "14.315", "ppl": "1.68", "accuracy": "88.693", "wer": "11.307", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "228400", "lr": "7.30649e-05", "gnorm": "2.113", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "648336"} [2024-07-13 12:55:57,100][train_inner][INFO] - {"epoch": 2, "update": 1.516, "loss": "0.778", "ntokens": "128.875", "acc_total": "128.875", "n_correct": "113.7", "wer_total": "128.875", "n_error": "15.17", "ppl": "1.71", "accuracy": "88.225", "wer": "11.771", "wps": "70.6", "ups": "0.55", "wpb": "128.9", "bsz": "8", "num_updates": "228600", "lr": "7.28463e-05", "gnorm": "2.203", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "648701"} [2024-07-13 13:02:01,850][train_inner][INFO] - {"epoch": 2, "update": 1.517, "loss": "0.766", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "112.4", "wer_total": "127.145", "n_error": "14.735", "ppl": "1.7", "accuracy": "88.403", "wer": "11.589", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "228800", "lr": "7.26284e-05", "gnorm": "2.16", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "649066"} [2024-07-13 13:08:06,684][train_inner][INFO] - {"epoch": 2, "update": 1.519, "loss": "0.799", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "111.425", "wer_total": "126.665", "n_error": "15.24", "ppl": "1.74", "accuracy": "87.968", "wer": "12.032", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "229000", "lr": "7.24112e-05", "gnorm": "2.366", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "649431"} [2024-07-13 13:14:11,520][train_inner][INFO] - {"epoch": 2, "update": 1.52, "loss": "0.77", "ntokens": "126.65", "acc_total": "126.65", "n_correct": "112.055", "wer_total": "126.65", "n_error": "14.595", "ppl": "1.71", "accuracy": "88.476", "wer": "11.524", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "229200", "lr": "7.21946e-05", "gnorm": "2.235", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "649796"} [2024-07-13 13:20:16,576][train_inner][INFO] - {"epoch": 2, "update": 1.521, "loss": "0.795", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "112.245", "wer_total": "127.615", "n_error": "15.365", "ppl": "1.74", "accuracy": "87.956", "wer": "12.04", "wps": "69.9", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "229400", "lr": "7.19786e-05", "gnorm": "2.24", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "650161"} [2024-07-13 13:26:21,451][train_inner][INFO] - {"epoch": 2, "update": 1.523, "loss": "0.764", "ntokens": "127.25", "acc_total": "127.25", "n_correct": "112.595", "wer_total": "127.25", "n_error": "14.655", "ppl": "1.7", "accuracy": "88.483", "wer": "11.517", "wps": "69.8", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "229600", "lr": "7.17633e-05", "gnorm": "2.144", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "650526"} [2024-07-13 13:31:51,768][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-13 13:32:28,181][train_inner][INFO] - {"epoch": 2, "update": 1.524, "loss": "0.73", "ntokens": "126.95", "acc_total": "126.95", "n_correct": "112.475", "wer_total": "126.95", "n_error": "14.47", "ppl": "1.66", "accuracy": "88.598", "wer": "11.398", "wps": "69.2", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "229800", "lr": "7.15487e-05", "gnorm": "2.212", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "650892"} [2024-07-13 13:38:32,963][train_inner][INFO] - {"epoch": 2, "update": 1.525, "loss": "0.779", "ntokens": "126.85", "acc_total": "126.85", "n_correct": "111.725", "wer_total": "126.85", "n_error": "15.125", "ppl": "1.72", "accuracy": "88.076", "wer": "11.924", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "230000", "lr": "7.13346e-05", "gnorm": "2.279", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "651257"} [2024-07-13 13:38:32,963][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 14:20:22,641][valid][INFO] - {"epoch": 2, "valid_loss": "0.669", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.1878", "valid_wer_total": "18.1585", "valid_n_error": "1.9702", "valid_ppl": "1.59", "valid_accuracy": "89.147", "valid_wer": "10.85", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "230000", "valid_best_accuracy": "89.178"} [2024-07-13 14:20:22,642][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 230000 updates [2024-07-13 14:20:22,642][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_230000.pt [2024-07-13 14:20:25,852][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_230000.pt [2024-07-13 14:20:27,961][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_230000.pt (epoch 2 @ 230000 updates, score 89.147) (writing took 5.31990855501499 seconds) [2024-07-13 14:26:32,874][train_inner][INFO] - {"epoch": 2, "update": 1.527, "loss": "0.743", "ntokens": "127.48", "acc_total": "127.48", "n_correct": "112.785", "wer_total": "127.48", "n_error": "14.695", "ppl": "1.67", "accuracy": "88.473", "wer": "11.527", "wps": "8.9", "ups": "0.07", "wpb": "127.5", "bsz": "8", "num_updates": "230200", "lr": "7.11213e-05", "gnorm": "2.206", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "654137"} [2024-07-13 14:32:37,745][train_inner][INFO] - {"epoch": 2, "update": 1.528, "loss": "0.77", "ntokens": "126.445", "acc_total": "126.445", "n_correct": "111.865", "wer_total": "126.445", "n_error": "14.58", "ppl": "1.71", "accuracy": "88.469", "wer": "11.531", "wps": "69.3", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "230400", "lr": "7.09085e-05", "gnorm": "2.166", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "654502"} [2024-07-13 14:38:42,403][train_inner][INFO] - {"epoch": 2, "update": 1.529, "loss": "0.776", "ntokens": "126.32", "acc_total": "126.32", "n_correct": "111.26", "wer_total": "126.32", "n_error": "15.06", "ppl": "1.71", "accuracy": "88.078", "wer": "11.922", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "230600", "lr": "7.06964e-05", "gnorm": "2.174", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "654866"} [2024-07-13 14:44:47,121][train_inner][INFO] - {"epoch": 2, "update": 1.531, "loss": "0.763", "ntokens": "126.98", "acc_total": "126.98", "n_correct": "112.15", "wer_total": "126.98", "n_error": "14.83", "ppl": "1.7", "accuracy": "88.321", "wer": "11.679", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "230800", "lr": "7.04849e-05", "gnorm": "2.324", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "655231"} [2024-07-13 14:50:51,927][train_inner][INFO] - {"epoch": 2, "update": 1.532, "loss": "0.748", "ntokens": "126.135", "acc_total": "126.135", "n_correct": "111.38", "wer_total": "126.135", "n_error": "14.755", "ppl": "1.68", "accuracy": "88.302", "wer": "11.698", "wps": "69.2", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "231000", "lr": "7.02741e-05", "gnorm": "2.257", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "655596"} [2024-07-13 14:56:56,957][train_inner][INFO] - {"epoch": 2, "update": 1.533, "loss": "0.773", "ntokens": "127.4", "acc_total": "127.4", "n_correct": "112.225", "wer_total": "127.4", "n_error": "15.175", "ppl": "1.71", "accuracy": "88.089", "wer": "11.911", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "231200", "lr": "7.00639e-05", "gnorm": "2.124", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "655961"} [2024-07-13 15:03:01,797][train_inner][INFO] - {"epoch": 2, "update": 1.535, "loss": "0.745", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "112.22", "wer_total": "126.935", "n_error": "14.715", "ppl": "1.68", "accuracy": "88.407", "wer": "11.593", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "231400", "lr": "6.98543e-05", "gnorm": "2.2", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "656326"} [2024-07-13 15:09:06,764][train_inner][INFO] - {"epoch": 2, "update": 1.536, "loss": "0.75", "ntokens": "126.79", "acc_total": "126.79", "n_correct": "112.01", "wer_total": "126.79", "n_error": "14.775", "ppl": "1.68", "accuracy": "88.343", "wer": "11.653", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "231600", "lr": "6.96454e-05", "gnorm": "2.223", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "656691"} [2024-07-13 15:15:11,803][train_inner][INFO] - {"epoch": 2, "update": 1.537, "loss": "0.741", "ntokens": "127.355", "acc_total": "127.355", "n_correct": "112.685", "wer_total": "127.355", "n_error": "14.67", "ppl": "1.67", "accuracy": "88.481", "wer": "11.519", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "231800", "lr": "6.9437e-05", "gnorm": "2.171", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "657056"} [2024-07-13 15:19:52,814][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-13 15:21:18,611][train_inner][INFO] - {"epoch": 2, "update": 1.539, "loss": "0.734", "ntokens": "127.5", "acc_total": "127.5", "n_correct": "113.01", "wer_total": "127.5", "n_error": "14.485", "ppl": "1.66", "accuracy": "88.635", "wer": "11.361", "wps": "69.5", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "232000", "lr": "6.92293e-05", "gnorm": "2.155", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "657423"} [2024-07-13 15:27:23,325][train_inner][INFO] - {"epoch": 2, "update": 1.54, "loss": "0.76", "ntokens": "125.89", "acc_total": "125.89", "n_correct": "111.08", "wer_total": "125.89", "n_error": "14.81", "ppl": "1.69", "accuracy": "88.236", "wer": "11.764", "wps": "69", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "232200", "lr": "6.90223e-05", "gnorm": "2.396", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "657787"} [2024-07-13 15:33:28,033][train_inner][INFO] - {"epoch": 2, "update": 1.541, "loss": "0.775", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "112.085", "wer_total": "127.185", "n_error": "15.095", "ppl": "1.71", "accuracy": "88.128", "wer": "11.869", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "232400", "lr": "6.88158e-05", "gnorm": "2.3", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "658152"} [2024-07-13 15:36:30,515][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 16:18:20,642][valid][INFO] - {"epoch": 2, "valid_loss": "0.66", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2193", "valid_wer_total": "18.1585", "valid_n_error": "1.93893", "valid_ppl": "1.58", "valid_accuracy": "89.321", "valid_wer": "10.678", "valid_wps": "173.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "232500", "valid_best_accuracy": "89.321"} [2024-07-13 16:18:20,642][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 232500 updates [2024-07-13 16:18:20,643][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_232500.pt [2024-07-13 16:18:23,874][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_232500.pt [2024-07-13 16:18:29,266][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_232500.pt (epoch 2 @ 232500 updates, score 89.321) (writing took 8.623056392068975 seconds) [2024-07-13 16:21:31,472][train_inner][INFO] - {"epoch": 2, "update": 1.542, "loss": "0.761", "ntokens": "127.555", "acc_total": "127.555", "n_correct": "112.805", "wer_total": "127.555", "n_error": "14.75", "ppl": "1.69", "accuracy": "88.436", "wer": "11.564", "wps": "8.8", "ups": "0.07", "wpb": "127.6", "bsz": "8", "num_updates": "232600", "lr": "6.861e-05", "gnorm": "2.304", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "661036"} [2024-07-13 16:27:36,508][train_inner][INFO] - {"epoch": 2, "update": 1.544, "loss": "0.797", "ntokens": "127.14", "acc_total": "127.14", "n_correct": "111.665", "wer_total": "127.14", "n_error": "15.475", "ppl": "1.74", "accuracy": "87.828", "wer": "12.172", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "232800", "lr": "6.84047e-05", "gnorm": "2.329", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "661401"} [2024-07-13 16:33:41,408][train_inner][INFO] - {"epoch": 2, "update": 1.545, "loss": "0.747", "ntokens": "126.695", "acc_total": "126.695", "n_correct": "111.93", "wer_total": "126.695", "n_error": "14.765", "ppl": "1.68", "accuracy": "88.346", "wer": "11.654", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "233000", "lr": "6.82001e-05", "gnorm": "2.186", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "661765"} [2024-07-13 16:39:46,418][train_inner][INFO] - {"epoch": 2, "update": 1.546, "loss": "0.776", "ntokens": "125.775", "acc_total": "125.775", "n_correct": "110.85", "wer_total": "125.775", "n_error": "14.92", "ppl": "1.71", "accuracy": "88.134", "wer": "11.862", "wps": "68.9", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "233200", "lr": "6.79961e-05", "gnorm": "2.142", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "662130"} [2024-07-13 16:45:51,337][train_inner][INFO] - {"epoch": 2, "update": 1.548, "loss": "0.738", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "112.37", "wer_total": "126.725", "n_error": "14.345", "ppl": "1.67", "accuracy": "88.672", "wer": "11.32", "wps": "69.5", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "233400", "lr": "6.77927e-05", "gnorm": "2.154", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "662495"} [2024-07-13 16:47:04,284][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-13 16:51:58,255][train_inner][INFO] - {"epoch": 2, "update": 1.549, "loss": "0.773", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "112.66", "wer_total": "127.615", "n_error": "14.955", "ppl": "1.71", "accuracy": "88.281", "wer": "11.719", "wps": "69.6", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "233600", "lr": "6.75899e-05", "gnorm": "2.204", "loss_scale": "512", "train_wall": "366", "gb_free": "6.5", "wall": "662862"} [2024-07-13 16:58:03,227][train_inner][INFO] - {"epoch": 2, "update": 1.55, "loss": "0.763", "ntokens": "126.895", "acc_total": "126.895", "n_correct": "112.31", "wer_total": "126.895", "n_error": "14.585", "ppl": "1.7", "accuracy": "88.506", "wer": "11.494", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "233800", "lr": "6.73877e-05", "gnorm": "2.163", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "663227"} [2024-07-13 17:04:08,045][train_inner][INFO] - {"epoch": 2, "update": 1.552, "loss": "0.768", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "111.765", "wer_total": "127.06", "n_error": "15.295", "ppl": "1.7", "accuracy": "87.962", "wer": "12.038", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "234000", "lr": "6.71862e-05", "gnorm": "2.186", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "663592"} [2024-07-13 17:10:12,965][train_inner][INFO] - {"epoch": 2, "update": 1.553, "loss": "0.761", "ntokens": "126.895", "acc_total": "126.895", "n_correct": "112", "wer_total": "126.895", "n_error": "14.895", "ppl": "1.69", "accuracy": "88.262", "wer": "11.738", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "234200", "lr": "6.69852e-05", "gnorm": "2.156", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "663957"} [2024-07-13 17:16:17,653][train_inner][INFO] - {"epoch": 2, "update": 1.554, "loss": "0.768", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "111.595", "wer_total": "126.58", "n_error": "14.975", "ppl": "1.7", "accuracy": "88.162", "wer": "11.83", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "234400", "lr": "6.67848e-05", "gnorm": "2.283", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "664322"} [2024-07-13 17:22:22,669][train_inner][INFO] - {"epoch": 2, "update": 1.556, "loss": "0.787", "ntokens": "127.475", "acc_total": "127.475", "n_correct": "112.33", "wer_total": "127.475", "n_error": "15.145", "ppl": "1.73", "accuracy": "88.119", "wer": "11.881", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "234600", "lr": "6.65851e-05", "gnorm": "2.216", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "664687"} [2024-07-13 17:28:27,643][train_inner][INFO] - {"epoch": 2, "update": 1.557, "loss": "0.718", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "112.33", "wer_total": "126.78", "n_error": "14.45", "ppl": "1.64", "accuracy": "88.602", "wer": "11.398", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "234800", "lr": "6.63859e-05", "gnorm": "2.175", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "665052"} [2024-07-13 17:34:32,484][train_inner][INFO] - {"epoch": 2, "update": 1.558, "loss": "0.751", "ntokens": "126.33", "acc_total": "126.33", "n_correct": "111.445", "wer_total": "126.33", "n_error": "14.885", "ppl": "1.68", "accuracy": "88.217", "wer": "11.783", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "235000", "lr": "6.61873e-05", "gnorm": "2.285", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "665417"} [2024-07-13 17:34:32,484][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 18:16:22,301][valid][INFO] - {"epoch": 2, "valid_loss": "0.66", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2202", "valid_wer_total": "18.1585", "valid_n_error": "1.93797", "valid_ppl": "1.58", "valid_accuracy": "89.326", "valid_wer": "10.673", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "235000", "valid_best_accuracy": "89.326"} [2024-07-13 18:16:22,302][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 235000 updates [2024-07-13 18:16:22,302][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_235000.pt [2024-07-13 18:16:25,531][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_235000.pt [2024-07-13 18:16:29,784][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_235000.pt (epoch 2 @ 235000 updates, score 89.326) (writing took 7.482316882000305 seconds) [2024-07-13 18:22:34,556][train_inner][INFO] - {"epoch": 2, "update": 1.56, "loss": "0.751", "ntokens": "126.585", "acc_total": "126.585", "n_correct": "111.79", "wer_total": "126.585", "n_error": "14.795", "ppl": "1.68", "accuracy": "88.312", "wer": "11.688", "wps": "8.8", "ups": "0.07", "wpb": "126.6", "bsz": "8", "num_updates": "235200", "lr": "6.59893e-05", "gnorm": "2.293", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "668299"} [2024-07-13 18:28:39,781][train_inner][INFO] - {"epoch": 2, "update": 1.561, "loss": "0.749", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "112.745", "wer_total": "127.64", "n_error": "14.895", "ppl": "1.68", "accuracy": "88.33", "wer": "11.67", "wps": "69.9", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "235400", "lr": "6.57919e-05", "gnorm": "2.12", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "668664"} [2024-07-13 18:34:44,995][train_inner][INFO] - {"epoch": 2, "update": 1.562, "loss": "0.749", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "112.19", "wer_total": "126.86", "n_error": "14.67", "ppl": "1.68", "accuracy": "88.436", "wer": "11.564", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "235600", "lr": "6.55951e-05", "gnorm": "2.181", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "669029"} [2024-07-13 18:40:50,273][train_inner][INFO] - {"epoch": 2, "update": 1.564, "loss": "0.772", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "111.665", "wer_total": "126.515", "n_error": "14.85", "ppl": "1.71", "accuracy": "88.262", "wer": "11.738", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "235800", "lr": "6.53989e-05", "gnorm": "2.168", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "669394"} [2024-07-13 18:46:55,274][train_inner][INFO] - {"epoch": 2, "update": 1.565, "loss": "0.757", "ntokens": "127.07", "acc_total": "127.07", "n_correct": "112.115", "wer_total": "127.07", "n_error": "14.955", "ppl": "1.69", "accuracy": "88.231", "wer": "11.769", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "236000", "lr": "6.52033e-05", "gnorm": "2.245", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "669759"} [2024-07-13 18:53:00,363][train_inner][INFO] - {"epoch": 2, "update": 1.566, "loss": "0.752", "ntokens": "126.695", "acc_total": "126.695", "n_correct": "111.715", "wer_total": "126.695", "n_error": "14.98", "ppl": "1.68", "accuracy": "88.176", "wer": "11.824", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "236200", "lr": "6.50083e-05", "gnorm": "2.171", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "670124"} [2024-07-13 18:59:05,580][train_inner][INFO] - {"epoch": 2, "update": 1.568, "loss": "0.775", "ntokens": "126.65", "acc_total": "126.65", "n_correct": "111.72", "wer_total": "126.65", "n_error": "14.93", "ppl": "1.71", "accuracy": "88.212", "wer": "11.788", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "236400", "lr": "6.48138e-05", "gnorm": "2.378", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "670490"} [2024-07-13 19:05:10,573][train_inner][INFO] - {"epoch": 2, "update": 1.569, "loss": "0.726", "ntokens": "125.82", "acc_total": "125.82", "n_correct": "111.8", "wer_total": "125.82", "n_error": "14.015", "ppl": "1.65", "accuracy": "88.857", "wer": "11.139", "wps": "68.9", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "236600", "lr": "6.46199e-05", "gnorm": "2.181", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "670855"} [2024-07-13 19:11:15,749][train_inner][INFO] - {"epoch": 2, "update": 1.57, "loss": "0.78", "ntokens": "126.67", "acc_total": "126.67", "n_correct": "111.76", "wer_total": "126.67", "n_error": "14.905", "ppl": "1.72", "accuracy": "88.229", "wer": "11.767", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "236800", "lr": "6.44266e-05", "gnorm": "2.149", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "671220"} [2024-07-13 19:17:20,897][train_inner][INFO] - {"epoch": 2, "update": 1.572, "loss": "0.735", "ntokens": "126.55", "acc_total": "126.55", "n_correct": "111.975", "wer_total": "126.55", "n_error": "14.575", "ppl": "1.66", "accuracy": "88.483", "wer": "11.517", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "237000", "lr": "6.42339e-05", "gnorm": "2.177", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "671585"} [2024-07-13 19:23:26,038][train_inner][INFO] - {"epoch": 2, "update": 1.573, "loss": "0.745", "ntokens": "127.92", "acc_total": "127.92", "n_correct": "112.88", "wer_total": "127.92", "n_error": "15.04", "ppl": "1.68", "accuracy": "88.243", "wer": "11.757", "wps": "70.1", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "237200", "lr": "6.40418e-05", "gnorm": "2.167", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "671950"} [2024-07-13 19:29:31,415][train_inner][INFO] - {"epoch": 2, "update": 1.574, "loss": "0.776", "ntokens": "127.735", "acc_total": "127.735", "n_correct": "112.665", "wer_total": "127.735", "n_error": "15.07", "ppl": "1.71", "accuracy": "88.202", "wer": "11.798", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "237400", "lr": "6.38502e-05", "gnorm": "2.31", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "672315"} [2024-07-13 19:32:34,079][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 20:14:26,705][valid][INFO] - {"epoch": 2, "valid_loss": "0.653", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2328", "valid_wer_total": "18.1585", "valid_n_error": "1.92543", "valid_ppl": "1.57", "valid_accuracy": "89.395", "valid_wer": "10.603", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "237500", "valid_best_accuracy": "89.395"} [2024-07-13 20:14:26,706][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 237500 updates [2024-07-13 20:14:26,706][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_237500.pt [2024-07-13 20:14:29,956][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_237500.pt [2024-07-13 20:14:34,247][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_237500.pt (epoch 2 @ 237500 updates, score 89.395) (writing took 7.541401801048778 seconds) [2024-07-13 20:17:36,490][train_inner][INFO] - {"epoch": 2, "update": 1.576, "loss": "0.77", "ntokens": "127.74", "acc_total": "127.74", "n_correct": "112.39", "wer_total": "127.74", "n_error": "15.345", "ppl": "1.71", "accuracy": "87.983", "wer": "12.013", "wps": "8.9", "ups": "0.07", "wpb": "127.7", "bsz": "8", "num_updates": "237600", "lr": "6.36592e-05", "gnorm": "2.206", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "675201"} [2024-07-13 20:23:41,667][train_inner][INFO] - {"epoch": 2, "update": 1.577, "loss": "0.764", "ntokens": "127.23", "acc_total": "127.23", "n_correct": "112.13", "wer_total": "127.23", "n_error": "15.1", "ppl": "1.7", "accuracy": "88.132", "wer": "11.868", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "237800", "lr": "6.34688e-05", "gnorm": "2.134", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "675566"} [2024-07-13 20:26:42,587][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-13 20:29:49,129][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "0.775", "ntokens": "127.225", "acc_total": "127.225", "n_correct": "111.88", "wer_total": "127.225", "n_error": "15.345", "ppl": "1.71", "accuracy": "87.939", "wer": "12.061", "wps": "69.2", "ups": "0.54", "wpb": "127.2", "bsz": "8", "num_updates": "238000", "lr": "6.3279e-05", "gnorm": "2.275", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "675933"} [2024-07-13 20:35:54,904][train_inner][INFO] - {"epoch": 2, "update": 1.58, "loss": "0.771", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "112.43", "wer_total": "127.335", "n_error": "14.905", "ppl": "1.71", "accuracy": "88.295", "wer": "11.705", "wps": "69.6", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "238200", "lr": "6.30897e-05", "gnorm": "2.215", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "676299"} [2024-07-13 20:42:00,705][train_inner][INFO] - {"epoch": 2, "update": 1.581, "loss": "0.741", "ntokens": "126.64", "acc_total": "126.64", "n_correct": "112.02", "wer_total": "126.64", "n_error": "14.61", "ppl": "1.67", "accuracy": "88.455", "wer": "11.537", "wps": "69.2", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "238400", "lr": "6.2901e-05", "gnorm": "2.244", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "676665"} [2024-07-13 20:48:06,733][train_inner][INFO] - {"epoch": 2, "update": 1.582, "loss": "0.764", "ntokens": "127", "acc_total": "127", "n_correct": "112.1", "wer_total": "127", "n_error": "14.9", "ppl": "1.7", "accuracy": "88.268", "wer": "11.732", "wps": "69.4", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "238600", "lr": "6.27128e-05", "gnorm": "2.246", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "677031"} [2024-07-13 20:54:12,745][train_inner][INFO] - {"epoch": 2, "update": 1.584, "loss": "0.722", "ntokens": "127.92", "acc_total": "127.92", "n_correct": "113.41", "wer_total": "127.92", "n_error": "14.51", "ppl": "1.65", "accuracy": "88.657", "wer": "11.343", "wps": "69.9", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "238800", "lr": "6.25252e-05", "gnorm": "2.091", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "677397"} [2024-07-13 21:00:18,332][train_inner][INFO] - {"epoch": 2, "update": 1.585, "loss": "0.708", "ntokens": "126.57", "acc_total": "126.57", "n_correct": "112.74", "wer_total": "126.57", "n_error": "13.83", "ppl": "1.63", "accuracy": "89.073", "wer": "10.927", "wps": "69.2", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "239000", "lr": "6.23382e-05", "gnorm": "2.207", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "677762"} [2024-07-13 21:06:24,069][train_inner][INFO] - {"epoch": 2, "update": 1.586, "loss": "0.774", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "112.005", "wer_total": "126.705", "n_error": "14.7", "ppl": "1.71", "accuracy": "88.398", "wer": "11.602", "wps": "69.3", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "239200", "lr": "6.21517e-05", "gnorm": "2.218", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "678128"} [2024-07-13 21:12:29,732][train_inner][INFO] - {"epoch": 2, "update": 1.588, "loss": "0.751", "ntokens": "126.29", "acc_total": "126.29", "n_correct": "111.43", "wer_total": "126.29", "n_error": "14.855", "ppl": "1.68", "accuracy": "88.233", "wer": "11.763", "wps": "69.1", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "239400", "lr": "6.19658e-05", "gnorm": "2.142", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "678494"} [2024-07-13 21:18:35,699][train_inner][INFO] - {"epoch": 2, "update": 1.589, "loss": "0.75", "ntokens": "127.195", "acc_total": "127.195", "n_correct": "112.36", "wer_total": "127.195", "n_error": "14.83", "ppl": "1.68", "accuracy": "88.337", "wer": "11.659", "wps": "69.5", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "239600", "lr": "6.17805e-05", "gnorm": "2.149", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "678860"} [2024-07-13 21:24:41,344][train_inner][INFO] - {"epoch": 2, "update": 1.59, "loss": "0.76", "ntokens": "127.3", "acc_total": "127.3", "n_correct": "112.345", "wer_total": "127.3", "n_error": "14.955", "ppl": "1.69", "accuracy": "88.252", "wer": "11.748", "wps": "69.6", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "239800", "lr": "6.15956e-05", "gnorm": "2.162", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "679225"} [2024-07-13 21:30:45,415][train_inner][INFO] - {"epoch": 2, "update": 1.592, "loss": "0.759", "ntokens": "126.615", "acc_total": "126.615", "n_correct": "111.74", "wer_total": "126.615", "n_error": "14.875", "ppl": "1.69", "accuracy": "88.252", "wer": "11.748", "wps": "69.6", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "240000", "lr": "6.14114e-05", "gnorm": "2.195", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "679589"} [2024-07-13 21:30:45,415][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-13 22:12:25,996][valid][INFO] - {"epoch": 2, "valid_loss": "0.654", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2398", "valid_wer_total": "18.1585", "valid_n_error": "1.91847", "valid_ppl": "1.57", "valid_accuracy": "89.434", "valid_wer": "10.565", "valid_wps": "174.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "240000", "valid_best_accuracy": "89.434"} [2024-07-13 22:12:25,997][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 240000 updates [2024-07-13 22:12:25,997][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_240000.pt [2024-07-13 22:12:29,243][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_240000.pt [2024-07-13 22:12:33,499][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_240000.pt (epoch 2 @ 240000 updates, score 89.434) (writing took 7.502469722065143 seconds) [2024-07-13 22:18:36,715][train_inner][INFO] - {"epoch": 2, "update": 1.593, "loss": "0.747", "ntokens": "127.035", "acc_total": "127.035", "n_correct": "112.58", "wer_total": "127.035", "n_error": "14.445", "ppl": "1.68", "accuracy": "88.621", "wer": "11.371", "wps": "8.8", "ups": "0.07", "wpb": "127", "bsz": "8", "num_updates": "240200", "lr": "6.12277e-05", "gnorm": "2.281", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "682461"} [2024-07-13 22:24:40,306][train_inner][INFO] - {"epoch": 2, "update": 1.594, "loss": "0.763", "ntokens": "126.435", "acc_total": "126.435", "n_correct": "111.545", "wer_total": "126.435", "n_error": "14.89", "ppl": "1.7", "accuracy": "88.223", "wer": "11.777", "wps": "69.5", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "240400", "lr": "6.10446e-05", "gnorm": "2.103", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "682824"} [2024-07-13 22:30:43,812][train_inner][INFO] - {"epoch": 2, "update": 1.596, "loss": "0.738", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "111.94", "wer_total": "126.305", "n_error": "14.365", "ppl": "1.67", "accuracy": "88.627", "wer": "11.373", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "240600", "lr": "6.0862e-05", "gnorm": "2.111", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "683188"} [2024-07-13 22:36:47,415][train_inner][INFO] - {"epoch": 2, "update": 1.597, "loss": "0.768", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "112.725", "wer_total": "127.785", "n_error": "15.05", "ppl": "1.7", "accuracy": "88.215", "wer": "11.778", "wps": "70.3", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "240800", "lr": "6.06799e-05", "gnorm": "2.167", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "683551"} [2024-07-13 22:42:50,558][train_inner][INFO] - {"epoch": 2, "update": 1.598, "loss": "0.739", "ntokens": "127.05", "acc_total": "127.05", "n_correct": "112.65", "wer_total": "127.05", "n_error": "14.395", "ppl": "1.67", "accuracy": "88.666", "wer": "11.33", "wps": "70", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "241000", "lr": "6.04984e-05", "gnorm": "2.163", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "683915"} [2024-07-13 22:48:54,049][train_inner][INFO] - {"epoch": 2, "update": 1.6, "loss": "0.775", "ntokens": "128.235", "acc_total": "128.235", "n_correct": "112.625", "wer_total": "128.235", "n_error": "15.61", "ppl": "1.71", "accuracy": "87.827", "wer": "12.173", "wps": "70.6", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "241200", "lr": "6.03174e-05", "gnorm": "2.141", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "684278"} [2024-07-13 22:53:44,604][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-13 22:54:59,072][train_inner][INFO] - {"epoch": 2, "update": 1.601, "loss": "0.72", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "112.825", "wer_total": "127.245", "n_error": "14.42", "ppl": "1.65", "accuracy": "88.668", "wer": "11.332", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "241400", "lr": "6.0137e-05", "gnorm": "2.131", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "684643"} [2024-07-13 23:01:02,273][train_inner][INFO] - {"epoch": 2, "update": 1.602, "loss": "0.719", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "112.755", "wer_total": "127.055", "n_error": "14.295", "ppl": "1.65", "accuracy": "88.745", "wer": "11.251", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "241600", "lr": "5.99571e-05", "gnorm": "2.222", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "685006"} [2024-07-13 23:07:05,610][train_inner][INFO] - {"epoch": 2, "update": 1.604, "loss": "0.776", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "111.835", "wer_total": "126.815", "n_error": "14.98", "ppl": "1.71", "accuracy": "88.188", "wer": "11.812", "wps": "69.8", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "241800", "lr": "5.97778e-05", "gnorm": "2.226", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "685370"} [2024-07-13 23:13:09,027][train_inner][INFO] - {"epoch": 2, "update": 1.605, "loss": "0.757", "ntokens": "126.525", "acc_total": "126.525", "n_correct": "111.775", "wer_total": "126.525", "n_error": "14.74", "ppl": "1.69", "accuracy": "88.342", "wer": "11.65", "wps": "69.6", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "242000", "lr": "5.9599e-05", "gnorm": "2.081", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "685733"} [2024-07-13 23:19:12,198][train_inner][INFO] - {"epoch": 2, "update": 1.606, "loss": "0.712", "ntokens": "127.29", "acc_total": "127.29", "n_correct": "113.045", "wer_total": "127.29", "n_error": "14.245", "ppl": "1.64", "accuracy": "88.809", "wer": "11.191", "wps": "70.1", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "242200", "lr": "5.94207e-05", "gnorm": "2.141", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "686096"} [2024-07-13 23:25:15,483][train_inner][INFO] - {"epoch": 2, "update": 1.607, "loss": "0.751", "ntokens": "127.195", "acc_total": "127.195", "n_correct": "112.66", "wer_total": "127.195", "n_error": "14.525", "ppl": "1.68", "accuracy": "88.573", "wer": "11.419", "wps": "70", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "242400", "lr": "5.92429e-05", "gnorm": "2.33", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "686460"} [2024-07-13 23:28:16,964][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 00:09:53,702][valid][INFO] - {"epoch": 2, "valid_loss": "0.653", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.242", "valid_wer_total": "18.1585", "valid_n_error": "1.91626", "valid_ppl": "1.57", "valid_accuracy": "89.446", "valid_wer": "10.553", "valid_wps": "174.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "242500", "valid_best_accuracy": "89.446"} [2024-07-14 00:09:53,703][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 242500 updates [2024-07-14 00:09:53,703][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_242500.pt [2024-07-14 00:09:56,943][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_242500.pt [2024-07-14 00:10:01,221][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_242500.pt (epoch 2 @ 242500 updates, score 89.446) (writing took 7.517909785965458 seconds) [2024-07-14 00:13:02,558][train_inner][INFO] - {"epoch": 2, "update": 1.609, "loss": "0.727", "ntokens": "127.805", "acc_total": "127.805", "n_correct": "113.415", "wer_total": "127.805", "n_error": "14.38", "ppl": "1.66", "accuracy": "88.741", "wer": "11.252", "wps": "8.9", "ups": "0.07", "wpb": "127.8", "bsz": "8", "num_updates": "242600", "lr": "5.90657e-05", "gnorm": "2.127", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "689327"} [2024-07-14 00:19:06,054][train_inner][INFO] - {"epoch": 2, "update": 1.61, "loss": "0.777", "ntokens": "127.475", "acc_total": "127.475", "n_correct": "112.685", "wer_total": "127.475", "n_error": "14.785", "ppl": "1.71", "accuracy": "88.398", "wer": "11.598", "wps": "70.1", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "242800", "lr": "5.88891e-05", "gnorm": "2.209", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "689690"} [2024-07-14 00:25:09,317][train_inner][INFO] - {"epoch": 2, "update": 1.611, "loss": "0.75", "ntokens": "126.75", "acc_total": "126.75", "n_correct": "112.46", "wer_total": "126.75", "n_error": "14.285", "ppl": "1.68", "accuracy": "88.726", "wer": "11.27", "wps": "69.8", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "243000", "lr": "5.87129e-05", "gnorm": "2.11", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "690053"} [2024-07-14 00:31:12,526][train_inner][INFO] - {"epoch": 2, "update": 1.613, "loss": "0.751", "ntokens": "126.965", "acc_total": "126.965", "n_correct": "112.46", "wer_total": "126.965", "n_error": "14.505", "ppl": "1.68", "accuracy": "88.576", "wer": "11.424", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "243200", "lr": "5.85373e-05", "gnorm": "2.168", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "690417"} [2024-07-14 00:31:23,320][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-14 00:37:17,395][train_inner][INFO] - {"epoch": 2, "update": 1.614, "loss": "0.774", "ntokens": "127.5", "acc_total": "127.5", "n_correct": "112.49", "wer_total": "127.5", "n_error": "15.01", "ppl": "1.71", "accuracy": "88.227", "wer": "11.773", "wps": "69.9", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "243400", "lr": "5.83622e-05", "gnorm": "2.292", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "690781"} [2024-07-14 00:43:20,821][train_inner][INFO] - {"epoch": 2, "update": 1.615, "loss": "0.752", "ntokens": "125.95", "acc_total": "125.95", "n_correct": "111.48", "wer_total": "125.95", "n_error": "14.465", "ppl": "1.68", "accuracy": "88.511", "wer": "11.485", "wps": "69.3", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "243600", "lr": "5.81876e-05", "gnorm": "2.387", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "691145"} [2024-07-14 00:49:24,087][train_inner][INFO] - {"epoch": 2, "update": 1.617, "loss": "0.733", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "112.58", "wer_total": "126.92", "n_error": "14.34", "ppl": "1.66", "accuracy": "88.702", "wer": "11.298", "wps": "69.9", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "243800", "lr": "5.80135e-05", "gnorm": "2.157", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "691508"} [2024-07-14 00:55:27,446][train_inner][INFO] - {"epoch": 2, "update": 1.618, "loss": "0.732", "ntokens": "126.565", "acc_total": "126.565", "n_correct": "112.57", "wer_total": "126.565", "n_error": "13.995", "ppl": "1.66", "accuracy": "88.942", "wer": "11.058", "wps": "69.7", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "244000", "lr": "5.784e-05", "gnorm": "2.102", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "691872"} [2024-07-14 01:01:31,008][train_inner][INFO] - {"epoch": 2, "update": 1.619, "loss": "0.724", "ntokens": "126.98", "acc_total": "126.98", "n_correct": "112.965", "wer_total": "126.98", "n_error": "14.015", "ppl": "1.65", "accuracy": "88.963", "wer": "11.037", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "244200", "lr": "5.7667e-05", "gnorm": "2.172", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "692235"} [2024-07-14 01:07:34,116][train_inner][INFO] - {"epoch": 2, "update": 1.621, "loss": "0.734", "ntokens": "127.605", "acc_total": "127.605", "n_correct": "113.18", "wer_total": "127.605", "n_error": "14.425", "ppl": "1.66", "accuracy": "88.696", "wer": "11.304", "wps": "70.3", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "244400", "lr": "5.74945e-05", "gnorm": "2.154", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "692598"} [2024-07-14 01:13:37,465][train_inner][INFO] - {"epoch": 2, "update": 1.622, "loss": "0.737", "ntokens": "127.675", "acc_total": "127.675", "n_correct": "113.235", "wer_total": "127.675", "n_error": "14.44", "ppl": "1.67", "accuracy": "88.69", "wer": "11.31", "wps": "70.3", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "244600", "lr": "5.73225e-05", "gnorm": "2.127", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "692962"} [2024-07-14 01:19:40,294][train_inner][INFO] - {"epoch": 2, "update": 1.623, "loss": "0.736", "ntokens": "126.44", "acc_total": "126.44", "n_correct": "112.16", "wer_total": "126.44", "n_error": "14.275", "ppl": "1.67", "accuracy": "88.706", "wer": "11.29", "wps": "69.7", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "244800", "lr": "5.71511e-05", "gnorm": "2.132", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "693324"} [2024-07-14 01:25:43,403][train_inner][INFO] - {"epoch": 2, "update": 1.625, "loss": "0.734", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "112.585", "wer_total": "126.905", "n_error": "14.32", "ppl": "1.66", "accuracy": "88.716", "wer": "11.284", "wps": "69.9", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "245000", "lr": "5.69801e-05", "gnorm": "2.229", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "693687"} [2024-07-14 01:25:43,403][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 02:07:19,167][valid][INFO] - {"epoch": 2, "valid_loss": "0.649", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2511", "valid_wer_total": "18.1585", "valid_n_error": "1.90738", "valid_ppl": "1.57", "valid_accuracy": "89.496", "valid_wer": "10.504", "valid_wps": "174.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "245000", "valid_best_accuracy": "89.496"} [2024-07-14 02:07:19,168][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 245000 updates [2024-07-14 02:07:19,168][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_245000.pt [2024-07-14 02:07:22,388][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_245000.pt [2024-07-14 02:07:26,679][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_245000.pt (epoch 2 @ 245000 updates, score 89.496) (writing took 7.511699378024787 seconds) [2024-07-14 02:13:29,615][train_inner][INFO] - {"epoch": 2, "update": 1.626, "loss": "0.763", "ntokens": "128.225", "acc_total": "128.225", "n_correct": "113.4", "wer_total": "128.225", "n_error": "14.825", "ppl": "1.7", "accuracy": "88.438", "wer": "11.562", "wps": "8.9", "ups": "0.07", "wpb": "128.2", "bsz": "8", "num_updates": "245200", "lr": "5.68097e-05", "gnorm": "2.216", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "696554"} [2024-07-14 02:19:32,868][train_inner][INFO] - {"epoch": 2, "update": 1.627, "loss": "0.747", "ntokens": "126.32", "acc_total": "126.32", "n_correct": "111.685", "wer_total": "126.32", "n_error": "14.635", "ppl": "1.68", "accuracy": "88.414", "wer": "11.586", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "245400", "lr": "5.66397e-05", "gnorm": "2.298", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "696917"} [2024-07-14 02:25:35,890][train_inner][INFO] - {"epoch": 2, "update": 1.629, "loss": "0.765", "ntokens": "125.76", "acc_total": "125.76", "n_correct": "111.08", "wer_total": "125.76", "n_error": "14.68", "ppl": "1.7", "accuracy": "88.327", "wer": "11.673", "wps": "69.3", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "245600", "lr": "5.64703e-05", "gnorm": "2.098", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "697280"} [2024-07-14 02:31:39,106][train_inner][INFO] - {"epoch": 2, "update": 1.63, "loss": "0.796", "ntokens": "126.49", "acc_total": "126.49", "n_correct": "111.205", "wer_total": "126.49", "n_error": "15.285", "ppl": "1.74", "accuracy": "87.916", "wer": "12.084", "wps": "69.7", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "245800", "lr": "5.63014e-05", "gnorm": "2.391", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "697643"} [2024-07-14 02:37:42,228][train_inner][INFO] - {"epoch": 2, "update": 1.631, "loss": "0.783", "ntokens": "126.265", "acc_total": "126.265", "n_correct": "111.255", "wer_total": "126.265", "n_error": "15.01", "ppl": "1.72", "accuracy": "88.112", "wer": "11.888", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "246000", "lr": "5.6133e-05", "gnorm": "2.115", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "698006"} [2024-07-14 02:43:45,250][train_inner][INFO] - {"epoch": 2, "update": 1.633, "loss": "0.726", "ntokens": "127.295", "acc_total": "127.295", "n_correct": "112.895", "wer_total": "127.295", "n_error": "14.4", "ppl": "1.65", "accuracy": "88.688", "wer": "11.312", "wps": "70.1", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "246200", "lr": "5.59651e-05", "gnorm": "1.991", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "698369"} [2024-07-14 02:49:48,549][train_inner][INFO] - {"epoch": 2, "update": 1.634, "loss": "0.742", "ntokens": "127.505", "acc_total": "127.505", "n_correct": "112.75", "wer_total": "127.505", "n_error": "14.75", "ppl": "1.67", "accuracy": "88.428", "wer": "11.568", "wps": "70.2", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "246400", "lr": "5.57977e-05", "gnorm": "2.116", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "698733"} [2024-07-14 02:55:51,848][train_inner][INFO] - {"epoch": 2, "update": 1.635, "loss": "0.731", "ntokens": "127.61", "acc_total": "127.61", "n_correct": "113.665", "wer_total": "127.61", "n_error": "13.945", "ppl": "1.66", "accuracy": "89.072", "wer": "10.928", "wps": "70.3", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "246600", "lr": "5.56308e-05", "gnorm": "2.238", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "699096"} [2024-07-14 02:59:24,178][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-14 03:01:56,719][train_inner][INFO] - {"epoch": 2, "update": 1.637, "loss": "0.729", "ntokens": "126.54", "acc_total": "126.54", "n_correct": "112.065", "wer_total": "126.54", "n_error": "14.475", "ppl": "1.66", "accuracy": "88.561", "wer": "11.439", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "246800", "lr": "5.54644e-05", "gnorm": "2.134", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "699461"} [2024-07-14 03:07:59,640][train_inner][INFO] - {"epoch": 2, "update": 1.638, "loss": "0.71", "ntokens": "126.665", "acc_total": "126.665", "n_correct": "112.765", "wer_total": "126.665", "n_error": "13.9", "ppl": "1.64", "accuracy": "89.026", "wer": "10.974", "wps": "69.8", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "247000", "lr": "5.52984e-05", "gnorm": "2.104", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "699824"} [2024-07-14 03:14:02,686][train_inner][INFO] - {"epoch": 2, "update": 1.639, "loss": "0.723", "ntokens": "127.555", "acc_total": "127.555", "n_correct": "113.66", "wer_total": "127.555", "n_error": "13.895", "ppl": "1.65", "accuracy": "89.107", "wer": "10.893", "wps": "70.3", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "247200", "lr": "5.5133e-05", "gnorm": "2.209", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "700187"} [2024-07-14 03:20:05,733][train_inner][INFO] - {"epoch": 2, "update": 1.641, "loss": "0.757", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "111.7", "wer_total": "126.705", "n_error": "15.005", "ppl": "1.69", "accuracy": "88.158", "wer": "11.842", "wps": "69.8", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "247400", "lr": "5.49681e-05", "gnorm": "2.274", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "700550"} [2024-07-14 03:23:07,269][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 04:04:40,748][valid][INFO] - {"epoch": 2, "valid_loss": "0.646", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2563", "valid_wer_total": "18.1585", "valid_n_error": "1.90204", "valid_ppl": "1.56", "valid_accuracy": "89.524", "valid_wer": "10.475", "valid_wps": "174.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "247500", "valid_best_accuracy": "89.524"} [2024-07-14 04:04:40,749][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 247500 updates [2024-07-14 04:04:40,749][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_247500.pt [2024-07-14 04:04:43,984][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_247500.pt [2024-07-14 04:04:48,297][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_247500.pt (epoch 2 @ 247500 updates, score 89.524) (writing took 7.548868204001337 seconds) [2024-07-14 04:07:49,437][train_inner][INFO] - {"epoch": 2, "update": 1.642, "loss": "0.744", "ntokens": "126.85", "acc_total": "126.85", "n_correct": "112.21", "wer_total": "126.85", "n_error": "14.64", "ppl": "1.68", "accuracy": "88.459", "wer": "11.541", "wps": "8.9", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "247600", "lr": "5.48037e-05", "gnorm": "2.162", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "703413"} [2024-07-14 04:13:52,428][train_inner][INFO] - {"epoch": 2, "update": 1.643, "loss": "0.766", "ntokens": "127.365", "acc_total": "127.365", "n_correct": "112.23", "wer_total": "127.365", "n_error": "15.13", "ppl": "1.7", "accuracy": "88.117", "wer": "11.879", "wps": "70.2", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "247800", "lr": "5.46398e-05", "gnorm": "2.224", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "703776"} [2024-07-14 04:19:55,405][train_inner][INFO] - {"epoch": 2, "update": 1.645, "loss": "0.723", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "113.075", "wer_total": "127.17", "n_error": "14.09", "ppl": "1.65", "accuracy": "88.916", "wer": "11.08", "wps": "70.1", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "248000", "lr": "5.44763e-05", "gnorm": "2.057", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "704139"} [2024-07-14 04:25:58,277][train_inner][INFO] - {"epoch": 2, "update": 1.646, "loss": "0.73", "ntokens": "127.675", "acc_total": "127.675", "n_correct": "112.88", "wer_total": "127.675", "n_error": "14.785", "ppl": "1.66", "accuracy": "88.412", "wer": "11.58", "wps": "70.4", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "248200", "lr": "5.43134e-05", "gnorm": "2.157", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "704502"} [2024-07-14 04:32:01,161][train_inner][INFO] - {"epoch": 2, "update": 1.647, "loss": "0.735", "ntokens": "126.8", "acc_total": "126.8", "n_correct": "112.385", "wer_total": "126.8", "n_error": "14.41", "ppl": "1.66", "accuracy": "88.632", "wer": "11.364", "wps": "69.9", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "248400", "lr": "5.41509e-05", "gnorm": "2.087", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "704865"} [2024-07-14 04:38:03,939][train_inner][INFO] - {"epoch": 2, "update": 1.649, "loss": "0.743", "ntokens": "127.01", "acc_total": "127.01", "n_correct": "112.39", "wer_total": "127.01", "n_error": "14.62", "ppl": "1.67", "accuracy": "88.489", "wer": "11.511", "wps": "70", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "248600", "lr": "5.39889e-05", "gnorm": "2.238", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "705228"} [2024-07-14 04:44:06,856][train_inner][INFO] - {"epoch": 2, "update": 1.65, "loss": "0.742", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "111.875", "wer_total": "126.46", "n_error": "14.585", "ppl": "1.67", "accuracy": "88.467", "wer": "11.533", "wps": "69.7", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "248800", "lr": "5.38274e-05", "gnorm": "2.114", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "705591"} [2024-07-14 04:50:09,796][train_inner][INFO] - {"epoch": 2, "update": 1.651, "loss": "0.755", "ntokens": "125.14", "acc_total": "125.14", "n_correct": "110.645", "wer_total": "125.14", "n_error": "14.495", "ppl": "1.69", "accuracy": "88.417", "wer": "11.583", "wps": "69", "ups": "0.55", "wpb": "125.1", "bsz": "8", "num_updates": "249000", "lr": "5.36664e-05", "gnorm": "2.107", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "705954"} [2024-07-14 04:56:12,815][train_inner][INFO] - {"epoch": 2, "update": 1.653, "loss": "0.75", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "112.415", "wer_total": "127.345", "n_error": "14.925", "ppl": "1.68", "accuracy": "88.276", "wer": "11.72", "wps": "70.2", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "249200", "lr": "5.35059e-05", "gnorm": "2.103", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "706317"} [2024-07-14 05:02:15,806][train_inner][INFO] - {"epoch": 2, "update": 1.654, "loss": "0.752", "ntokens": "126.38", "acc_total": "126.38", "n_correct": "111.535", "wer_total": "126.38", "n_error": "14.845", "ppl": "1.68", "accuracy": "88.254", "wer": "11.746", "wps": "69.6", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "249400", "lr": "5.33458e-05", "gnorm": "2.084", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "706680"} [2024-07-14 05:08:18,682][train_inner][INFO] - {"epoch": 2, "update": 1.655, "loss": "0.745", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "112.89", "wer_total": "127.485", "n_error": "14.595", "ppl": "1.68", "accuracy": "88.552", "wer": "11.448", "wps": "70.3", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "249600", "lr": "5.31863e-05", "gnorm": "2.096", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "707043"} [2024-07-14 05:14:21,655][train_inner][INFO] - {"epoch": 2, "update": 1.657, "loss": "0.748", "ntokens": "127.615", "acc_total": "127.615", "n_correct": "112.765", "wer_total": "127.615", "n_error": "14.85", "ppl": "1.68", "accuracy": "88.363", "wer": "11.637", "wps": "70.3", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "249800", "lr": "5.30272e-05", "gnorm": "2.062", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "707406"} [2024-07-14 05:20:24,625][train_inner][INFO] - {"epoch": 2, "update": 1.658, "loss": "0.719", "ntokens": "126.415", "acc_total": "126.415", "n_correct": "112.345", "wer_total": "126.415", "n_error": "14.07", "ppl": "1.65", "accuracy": "88.87", "wer": "11.13", "wps": "69.7", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "250000", "lr": "5.28686e-05", "gnorm": "2.056", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "707769"} [2024-07-14 05:20:24,626][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 06:02:00,326][valid][INFO] - {"epoch": 2, "valid_loss": "0.641", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2657", "valid_wer_total": "18.1585", "valid_n_error": "1.89262", "valid_ppl": "1.56", "valid_accuracy": "89.577", "valid_wer": "10.423", "valid_wps": "174.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "250000", "valid_best_accuracy": "89.577"} [2024-07-14 06:02:00,327][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 250000 updates [2024-07-14 06:02:00,327][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_250000.pt [2024-07-14 06:02:03,536][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_250000.pt [2024-07-14 06:02:07,903][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_250000.pt (epoch 2 @ 250000 updates, score 89.577) (writing took 7.575633786036633 seconds) [2024-07-14 06:08:10,521][train_inner][INFO] - {"epoch": 2, "update": 1.659, "loss": "0.743", "ntokens": "125.805", "acc_total": "125.805", "n_correct": "111.325", "wer_total": "125.805", "n_error": "14.48", "ppl": "1.67", "accuracy": "88.49", "wer": "11.51", "wps": "8.8", "ups": "0.07", "wpb": "125.8", "bsz": "8", "num_updates": "250200", "lr": "5.27104e-05", "gnorm": "2.065", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "710635"} [2024-07-14 06:14:13,593][train_inner][INFO] - {"epoch": 2, "update": 1.661, "loss": "0.742", "ntokens": "127.535", "acc_total": "127.535", "n_correct": "113.11", "wer_total": "127.535", "n_error": "14.425", "ppl": "1.67", "accuracy": "88.689", "wer": "11.311", "wps": "70.3", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "250400", "lr": "5.25528e-05", "gnorm": "2.151", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "710998"} [2024-07-14 06:20:16,429][train_inner][INFO] - {"epoch": 2, "update": 1.662, "loss": "0.733", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "112.785", "wer_total": "127.32", "n_error": "14.535", "ppl": "1.66", "accuracy": "88.584", "wer": "11.416", "wps": "70.2", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "250600", "lr": "5.23956e-05", "gnorm": "2.237", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "711360"} [2024-07-14 06:26:19,739][train_inner][INFO] - {"epoch": 2, "update": 1.663, "loss": "0.757", "ntokens": "127.29", "acc_total": "127.29", "n_correct": "112.375", "wer_total": "127.29", "n_error": "14.91", "ppl": "1.69", "accuracy": "88.283", "wer": "11.713", "wps": "70.1", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "250800", "lr": "5.22388e-05", "gnorm": "2.116", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "711724"} [2024-07-14 06:32:22,699][train_inner][INFO] - {"epoch": 2, "update": 1.665, "loss": "0.757", "ntokens": "127.975", "acc_total": "127.975", "n_correct": "113.355", "wer_total": "127.975", "n_error": "14.62", "ppl": "1.69", "accuracy": "88.576", "wer": "11.424", "wps": "70.5", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "251000", "lr": "5.20826e-05", "gnorm": "2.168", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "712087"} [2024-07-14 06:38:25,851][train_inner][INFO] - {"epoch": 2, "update": 1.666, "loss": "0.704", "ntokens": "127.23", "acc_total": "127.23", "n_correct": "113.58", "wer_total": "127.23", "n_error": "13.645", "ppl": "1.63", "accuracy": "89.271", "wer": "10.725", "wps": "70.1", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "251200", "lr": "5.19268e-05", "gnorm": "2.07", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "712450"} [2024-07-14 06:44:28,821][train_inner][INFO] - {"epoch": 2, "update": 1.667, "loss": "0.76", "ntokens": "126.985", "acc_total": "126.985", "n_correct": "112.355", "wer_total": "126.985", "n_error": "14.625", "ppl": "1.69", "accuracy": "88.479", "wer": "11.517", "wps": "70", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "251400", "lr": "5.17714e-05", "gnorm": "2.131", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "712813"} [2024-07-14 06:50:31,850][train_inner][INFO] - {"epoch": 2, "update": 1.668, "loss": "0.727", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "112.54", "wer_total": "126.93", "n_error": "14.39", "ppl": "1.65", "accuracy": "88.663", "wer": "11.337", "wps": "69.9", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "251600", "lr": "5.16166e-05", "gnorm": "2.174", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "713176"} [2024-07-14 06:56:35,441][train_inner][INFO] - {"epoch": 2, "update": 1.67, "loss": "0.712", "ntokens": "126.715", "acc_total": "126.715", "n_correct": "112.745", "wer_total": "126.715", "n_error": "13.97", "ppl": "1.64", "accuracy": "88.975", "wer": "11.025", "wps": "69.7", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "251800", "lr": "5.14622e-05", "gnorm": "2.074", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "713539"} [2024-07-14 07:02:38,763][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "0.727", "ntokens": "127.93", "acc_total": "127.93", "n_correct": "113.66", "wer_total": "127.93", "n_error": "14.27", "ppl": "1.66", "accuracy": "88.845", "wer": "11.155", "wps": "70.4", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "252000", "lr": "5.13083e-05", "gnorm": "2.154", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "713903"} [2024-07-14 07:08:41,684][train_inner][INFO] - {"epoch": 2, "update": 1.672, "loss": "0.72", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "113.17", "wer_total": "127.1", "n_error": "13.93", "ppl": "1.65", "accuracy": "89.04", "wer": "10.96", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "252200", "lr": "5.11548e-05", "gnorm": "2.024", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "714266"} [2024-07-14 07:14:45,645][train_inner][INFO] - {"epoch": 2, "update": 1.674, "loss": "0.725", "ntokens": "125.91", "acc_total": "125.91", "n_correct": "111.885", "wer_total": "125.91", "n_error": "14.025", "ppl": "1.65", "accuracy": "88.861", "wer": "11.139", "wps": "69.2", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "252400", "lr": "5.10018e-05", "gnorm": "2.095", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "714630"} [2024-07-14 07:17:47,202][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 07:59:22,076][valid][INFO] - {"epoch": 2, "valid_loss": "0.643", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2622", "valid_wer_total": "18.1585", "valid_n_error": "1.89617", "valid_ppl": "1.56", "valid_accuracy": "89.557", "valid_wer": "10.442", "valid_wps": "174.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "252500", "valid_best_accuracy": "89.577"} [2024-07-14 07:59:22,077][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 252500 updates [2024-07-14 07:59:22,077][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_252500.pt [2024-07-14 07:59:25,307][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_252500.pt [2024-07-14 07:59:27,480][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_252500.pt (epoch 2 @ 252500 updates, score 89.557) (writing took 5.403012417955324 seconds) [2024-07-14 08:01:43,510][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-14 08:02:30,931][train_inner][INFO] - {"epoch": 2, "update": 1.675, "loss": "0.732", "ntokens": "127.86", "acc_total": "127.86", "n_correct": "113.725", "wer_total": "127.86", "n_error": "14.13", "ppl": "1.66", "accuracy": "88.945", "wer": "11.051", "wps": "8.9", "ups": "0.07", "wpb": "127.9", "bsz": "8", "num_updates": "252600", "lr": "5.08492e-05", "gnorm": "2.172", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "717495"} [2024-07-14 08:08:34,619][train_inner][INFO] - {"epoch": 2, "update": 1.676, "loss": "0.785", "ntokens": "126.28", "acc_total": "126.28", "n_correct": "111.305", "wer_total": "126.28", "n_error": "14.975", "ppl": "1.72", "accuracy": "88.141", "wer": "11.859", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "252800", "lr": "5.06971e-05", "gnorm": "2.144", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "717859"} [2024-07-14 08:14:38,619][train_inner][INFO] - {"epoch": 2, "update": 1.678, "loss": "0.744", "ntokens": "128.045", "acc_total": "128.045", "n_correct": "113.08", "wer_total": "128.045", "n_error": "14.965", "ppl": "1.68", "accuracy": "88.313", "wer": "11.687", "wps": "70.4", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "253000", "lr": "5.05454e-05", "gnorm": "2.117", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "718223"} [2024-07-14 08:20:42,749][train_inner][INFO] - {"epoch": 2, "update": 1.679, "loss": "0.733", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "112.255", "wer_total": "126.305", "n_error": "14.045", "ppl": "1.66", "accuracy": "88.876", "wer": "11.12", "wps": "69.4", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "253200", "lr": "5.03943e-05", "gnorm": "2.168", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "718587"} [2024-07-14 08:26:46,673][train_inner][INFO] - {"epoch": 2, "update": 1.68, "loss": "0.71", "ntokens": "127.3", "acc_total": "127.3", "n_correct": "113.235", "wer_total": "127.3", "n_error": "14.065", "ppl": "1.64", "accuracy": "88.951", "wer": "11.049", "wps": "70", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "253400", "lr": "5.02435e-05", "gnorm": "2.097", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "718951"} [2024-07-14 08:32:50,647][train_inner][INFO] - {"epoch": 2, "update": 1.682, "loss": "0.717", "ntokens": "125.845", "acc_total": "125.845", "n_correct": "111.845", "wer_total": "125.845", "n_error": "14", "ppl": "1.64", "accuracy": "88.875", "wer": "11.125", "wps": "69.2", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "253600", "lr": "5.00932e-05", "gnorm": "2.14", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "719315"} [2024-07-14 08:38:54,820][train_inner][INFO] - {"epoch": 2, "update": 1.683, "loss": "0.719", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "112.02", "wer_total": "126.01", "n_error": "13.985", "ppl": "1.65", "accuracy": "88.898", "wer": "11.098", "wps": "69.2", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "253800", "lr": "4.99434e-05", "gnorm": "2.069", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "719679"} [2024-07-14 08:44:59,181][train_inner][INFO] - {"epoch": 2, "update": 1.684, "loss": "0.737", "ntokens": "127.24", "acc_total": "127.24", "n_correct": "112.955", "wer_total": "127.24", "n_error": "14.285", "ppl": "1.67", "accuracy": "88.773", "wer": "11.227", "wps": "69.8", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "254000", "lr": "4.9794e-05", "gnorm": "2.149", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "720043"} [2024-07-14 08:51:03,453][train_inner][INFO] - {"epoch": 2, "update": 1.686, "loss": "0.763", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "111.605", "wer_total": "126.46", "n_error": "14.855", "ppl": "1.7", "accuracy": "88.253", "wer": "11.747", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "254200", "lr": "4.9645e-05", "gnorm": "2.211", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "720408"} [2024-07-14 08:57:07,945][train_inner][INFO] - {"epoch": 2, "update": 1.687, "loss": "0.729", "ntokens": "126.04", "acc_total": "126.04", "n_correct": "111.39", "wer_total": "126.04", "n_error": "14.65", "ppl": "1.66", "accuracy": "88.377", "wer": "11.623", "wps": "69.2", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "254400", "lr": "4.94965e-05", "gnorm": "2.128", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "720772"} [2024-07-14 09:03:12,143][train_inner][INFO] - {"epoch": 2, "update": 1.688, "loss": "0.708", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "112.47", "wer_total": "126.53", "n_error": "14.06", "ppl": "1.63", "accuracy": "88.888", "wer": "11.112", "wps": "69.5", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "254600", "lr": "4.93485e-05", "gnorm": "2.129", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "721136"} [2024-07-14 09:09:16,498][train_inner][INFO] - {"epoch": 2, "update": 1.69, "loss": "0.761", "ntokens": "127.54", "acc_total": "127.54", "n_correct": "112.495", "wer_total": "127.54", "n_error": "15.045", "ppl": "1.69", "accuracy": "88.204", "wer": "11.796", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "254800", "lr": "4.92009e-05", "gnorm": "2.169", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "721501"} [2024-07-14 09:15:20,626][train_inner][INFO] - {"epoch": 2, "update": 1.691, "loss": "0.733", "ntokens": "126.215", "acc_total": "126.215", "n_correct": "111.785", "wer_total": "126.215", "n_error": "14.43", "ppl": "1.66", "accuracy": "88.567", "wer": "11.433", "wps": "69.3", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "255000", "lr": "4.90537e-05", "gnorm": "2.158", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "721865"} [2024-07-14 09:15:20,627][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 09:57:05,676][valid][INFO] - {"epoch": 2, "valid_loss": "0.641", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2687", "valid_wer_total": "18.1585", "valid_n_error": "1.88962", "valid_ppl": "1.56", "valid_accuracy": "89.593", "valid_wer": "10.406", "valid_wps": "173.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "255000", "valid_best_accuracy": "89.593"} [2024-07-14 09:57:05,677][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 255000 updates [2024-07-14 09:57:05,677][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_255000.pt [2024-07-14 09:57:08,905][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_255000.pt [2024-07-14 09:57:14,347][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_255000.pt (epoch 2 @ 255000 updates, score 89.593) (writing took 8.670363477081992 seconds) [2024-07-14 10:03:18,647][train_inner][INFO] - {"epoch": 2, "update": 1.692, "loss": "0.723", "ntokens": "127.83", "acc_total": "127.83", "n_correct": "113.465", "wer_total": "127.83", "n_error": "14.36", "ppl": "1.65", "accuracy": "88.762", "wer": "11.234", "wps": "8.9", "ups": "0.07", "wpb": "127.8", "bsz": "8", "num_updates": "255200", "lr": "4.8907e-05", "gnorm": "2.173", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "724743"} [2024-07-14 10:09:23,040][train_inner][INFO] - {"epoch": 2, "update": 1.694, "loss": "0.699", "ntokens": "127.48", "acc_total": "127.48", "n_correct": "113.255", "wer_total": "127.48", "n_error": "14.22", "ppl": "1.62", "accuracy": "88.841", "wer": "11.155", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "255400", "lr": "4.87607e-05", "gnorm": "1.998", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "725107"} [2024-07-14 10:15:27,437][train_inner][INFO] - {"epoch": 2, "update": 1.695, "loss": "0.727", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "113.03", "wer_total": "127.405", "n_error": "14.375", "ppl": "1.66", "accuracy": "88.717", "wer": "11.283", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "255600", "lr": "4.86148e-05", "gnorm": "2.152", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "725471"} [2024-07-14 10:21:31,953][train_inner][INFO] - {"epoch": 2, "update": 1.696, "loss": "0.73", "ntokens": "126.37", "acc_total": "126.37", "n_correct": "112.145", "wer_total": "126.37", "n_error": "14.225", "ppl": "1.66", "accuracy": "88.743", "wer": "11.257", "wps": "69.3", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "255800", "lr": "4.84694e-05", "gnorm": "2.07", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "725836"} [2024-07-14 10:27:36,305][train_inner][INFO] - {"epoch": 2, "update": 1.698, "loss": "0.721", "ntokens": "128.08", "acc_total": "128.08", "n_correct": "113.595", "wer_total": "128.08", "n_error": "14.485", "ppl": "1.65", "accuracy": "88.691", "wer": "11.309", "wps": "70.3", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "256000", "lr": "4.83244e-05", "gnorm": "2.059", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "726200"} [2024-07-14 10:33:40,713][train_inner][INFO] - {"epoch": 2, "update": 1.699, "loss": "0.759", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "112.195", "wer_total": "126.76", "n_error": "14.565", "ppl": "1.69", "accuracy": "88.51", "wer": "11.49", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "256200", "lr": "4.81799e-05", "gnorm": "2.13", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "726565"} [2024-07-14 10:39:45,104][train_inner][INFO] - {"epoch": 2, "update": 1.7, "loss": "0.74", "ntokens": "126.42", "acc_total": "126.42", "n_correct": "111.91", "wer_total": "126.42", "n_error": "14.505", "ppl": "1.67", "accuracy": "88.522", "wer": "11.474", "wps": "69.4", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "256400", "lr": "4.80357e-05", "gnorm": "2.014", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "726929"} [2024-07-14 10:45:49,474][train_inner][INFO] - {"epoch": 2, "update": 1.702, "loss": "0.751", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "111.735", "wer_total": "126.255", "n_error": "14.52", "ppl": "1.68", "accuracy": "88.499", "wer": "11.501", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "256600", "lr": "4.78921e-05", "gnorm": "2.084", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "727294"} [2024-07-14 10:49:20,851][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-14 10:51:55,743][train_inner][INFO] - {"epoch": 2, "update": 1.703, "loss": "0.705", "ntokens": "127.315", "acc_total": "127.315", "n_correct": "113.4", "wer_total": "127.315", "n_error": "13.915", "ppl": "1.63", "accuracy": "89.07", "wer": "10.93", "wps": "69.5", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "256800", "lr": "4.77488e-05", "gnorm": "2.048", "loss_scale": "2048", "train_wall": "366", "gb_free": "6.5", "wall": "727660"} [2024-07-14 10:58:00,100][train_inner][INFO] - {"epoch": 2, "update": 1.704, "loss": "0.777", "ntokens": "126.845", "acc_total": "126.845", "n_correct": "112.005", "wer_total": "126.845", "n_error": "14.84", "ppl": "1.71", "accuracy": "88.301", "wer": "11.699", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "257000", "lr": "4.7606e-05", "gnorm": "2.182", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "728024"} [2024-07-14 11:04:04,571][train_inner][INFO] - {"epoch": 2, "update": 1.706, "loss": "0.703", "ntokens": "128.185", "acc_total": "128.185", "n_correct": "114.09", "wer_total": "128.185", "n_error": "14.09", "ppl": "1.63", "accuracy": "89.004", "wer": "10.992", "wps": "70.3", "ups": "0.55", "wpb": "128.2", "bsz": "8", "num_updates": "257200", "lr": "4.74636e-05", "gnorm": "2.076", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "728389"} [2024-07-14 11:10:09,063][train_inner][INFO] - {"epoch": 2, "update": 1.707, "loss": "0.753", "ntokens": "126.28", "acc_total": "126.28", "n_correct": "111.43", "wer_total": "126.28", "n_error": "14.85", "ppl": "1.69", "accuracy": "88.24", "wer": "11.76", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "257400", "lr": "4.73216e-05", "gnorm": "2.202", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "728753"} [2024-07-14 11:13:11,247][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 11:54:58,009][valid][INFO] - {"epoch": 2, "valid_loss": "0.642", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2561", "valid_wer_total": "18.1585", "valid_n_error": "1.90225", "valid_ppl": "1.56", "valid_accuracy": "89.523", "valid_wer": "10.476", "valid_wps": "173.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "257500", "valid_best_accuracy": "89.593"} [2024-07-14 11:54:58,010][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 257500 updates [2024-07-14 11:54:58,010][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_257500.pt [2024-07-14 11:55:01,242][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_257500.pt [2024-07-14 11:55:03,422][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_257500.pt (epoch 2 @ 257500 updates, score 89.523) (writing took 5.411920294049196 seconds) [2024-07-14 11:58:05,320][train_inner][INFO] - {"epoch": 2, "update": 1.708, "loss": "0.722", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "112.95", "wer_total": "127.1", "n_error": "14.15", "ppl": "1.65", "accuracy": "88.867", "wer": "11.133", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "257600", "lr": "4.718e-05", "gnorm": "2.143", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "731629"} [2024-07-14 12:04:10,013][train_inner][INFO] - {"epoch": 2, "update": 1.71, "loss": "0.733", "ntokens": "128.435", "acc_total": "128.435", "n_correct": "114.01", "wer_total": "128.435", "n_error": "14.425", "ppl": "1.66", "accuracy": "88.769", "wer": "11.231", "wps": "70.4", "ups": "0.55", "wpb": "128.4", "bsz": "8", "num_updates": "257800", "lr": "4.70389e-05", "gnorm": "2.096", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "731994"} [2024-07-14 12:04:37,273][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-14 12:10:16,232][train_inner][INFO] - {"epoch": 2, "update": 1.711, "loss": "0.777", "ntokens": "126.16", "acc_total": "126.16", "n_correct": "111.45", "wer_total": "126.16", "n_error": "14.705", "ppl": "1.71", "accuracy": "88.34", "wer": "11.656", "wps": "68.9", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "258000", "lr": "4.68982e-05", "gnorm": "2.194", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "732360"} [2024-07-14 12:16:20,755][train_inner][INFO] - {"epoch": 2, "update": 1.712, "loss": "0.765", "ntokens": "126.48", "acc_total": "126.48", "n_correct": "111.575", "wer_total": "126.48", "n_error": "14.9", "ppl": "1.7", "accuracy": "88.216", "wer": "11.781", "wps": "69.4", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "258200", "lr": "4.67579e-05", "gnorm": "2.169", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "732725"} [2024-07-14 12:22:25,400][train_inner][INFO] - {"epoch": 2, "update": 1.714, "loss": "0.743", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "112.69", "wer_total": "127.055", "n_error": "14.365", "ppl": "1.67", "accuracy": "88.694", "wer": "11.306", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "258400", "lr": "4.66181e-05", "gnorm": "2.117", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "733089"} [2024-07-14 12:28:30,003][train_inner][INFO] - {"epoch": 2, "update": 1.715, "loss": "0.718", "ntokens": "127.18", "acc_total": "127.18", "n_correct": "113.05", "wer_total": "127.18", "n_error": "14.13", "ppl": "1.64", "accuracy": "88.89", "wer": "11.11", "wps": "69.8", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "258600", "lr": "4.64786e-05", "gnorm": "2.064", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "733454"} [2024-07-14 12:34:34,514][train_inner][INFO] - {"epoch": 2, "update": 1.716, "loss": "0.722", "ntokens": "127.27", "acc_total": "127.27", "n_correct": "113.045", "wer_total": "127.27", "n_error": "14.225", "ppl": "1.65", "accuracy": "88.823", "wer": "11.177", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "258800", "lr": "4.63396e-05", "gnorm": "2.028", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "733819"} [2024-07-14 12:40:39,354][train_inner][INFO] - {"epoch": 2, "update": 1.718, "loss": "0.739", "ntokens": "126.755", "acc_total": "126.755", "n_correct": "112.41", "wer_total": "126.755", "n_error": "14.34", "ppl": "1.67", "accuracy": "88.683", "wer": "11.313", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "259000", "lr": "4.6201e-05", "gnorm": "2.061", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "734183"} [2024-07-14 12:46:44,273][train_inner][INFO] - {"epoch": 2, "update": 1.719, "loss": "0.678", "ntokens": "127.995", "acc_total": "127.995", "n_correct": "114.48", "wer_total": "127.995", "n_error": "13.515", "ppl": "1.6", "accuracy": "89.441", "wer": "10.559", "wps": "70.2", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "259200", "lr": "4.60628e-05", "gnorm": "2.038", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "734548"} [2024-07-14 12:52:49,009][train_inner][INFO] - {"epoch": 2, "update": 1.72, "loss": "0.699", "ntokens": "126.965", "acc_total": "126.965", "n_correct": "113.13", "wer_total": "126.965", "n_error": "13.835", "ppl": "1.62", "accuracy": "89.103", "wer": "10.897", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "259400", "lr": "4.5925e-05", "gnorm": "2.028", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "734913"} [2024-07-14 12:58:53,825][train_inner][INFO] - {"epoch": 2, "update": 1.722, "loss": "0.729", "ntokens": "127.65", "acc_total": "127.65", "n_correct": "113.38", "wer_total": "127.65", "n_error": "14.27", "ppl": "1.66", "accuracy": "88.821", "wer": "11.179", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "259600", "lr": "4.57876e-05", "gnorm": "2.139", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "735278"} [2024-07-14 13:04:58,749][train_inner][INFO] - {"epoch": 2, "update": 1.723, "loss": "0.714", "ntokens": "126.165", "acc_total": "126.165", "n_correct": "112.235", "wer_total": "126.165", "n_error": "13.93", "ppl": "1.64", "accuracy": "88.959", "wer": "11.041", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "259800", "lr": "4.56507e-05", "gnorm": "2.126", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "735643"} [2024-07-14 13:11:03,236][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "0.741", "ntokens": "126.615", "acc_total": "126.615", "n_correct": "112.3", "wer_total": "126.615", "n_error": "14.31", "ppl": "1.67", "accuracy": "88.694", "wer": "11.302", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "260000", "lr": "4.55141e-05", "gnorm": "2.244", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "736007"} [2024-07-14 13:11:03,236][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 13:52:50,426][valid][INFO] - {"epoch": 2, "valid_loss": "0.635", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2766", "valid_wer_total": "18.1585", "valid_n_error": "1.88166", "valid_ppl": "1.55", "valid_accuracy": "89.636", "valid_wer": "10.362", "valid_wps": "173.8", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "260000", "valid_best_accuracy": "89.636"} [2024-07-14 13:52:50,427][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 260000 updates [2024-07-14 13:52:50,427][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_260000.pt [2024-07-14 13:52:53,624][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_260000.pt [2024-07-14 13:52:57,804][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_260000.pt (epoch 2 @ 260000 updates, score 89.636) (writing took 7.376555465045385 seconds) [2024-07-14 13:59:02,411][train_inner][INFO] - {"epoch": 2, "update": 1.726, "loss": "0.74", "ntokens": "126.28", "acc_total": "126.28", "n_correct": "111.95", "wer_total": "126.28", "n_error": "14.33", "ppl": "1.67", "accuracy": "88.652", "wer": "11.348", "wps": "8.8", "ups": "0.07", "wpb": "126.3", "bsz": "8", "num_updates": "260200", "lr": "4.5378e-05", "gnorm": "2.043", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "738886"} [2024-07-14 14:05:07,444][train_inner][INFO] - {"epoch": 2, "update": 1.727, "loss": "0.663", "ntokens": "127.67", "acc_total": "127.67", "n_correct": "114.35", "wer_total": "127.67", "n_error": "13.32", "ppl": "1.58", "accuracy": "89.567", "wer": "10.433", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "260400", "lr": "4.52422e-05", "gnorm": "1.925", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "739251"} [2024-07-14 14:11:12,377][train_inner][INFO] - {"epoch": 2, "update": 1.728, "loss": "0.728", "ntokens": "127.18", "acc_total": "127.18", "n_correct": "112.93", "wer_total": "127.18", "n_error": "14.245", "ppl": "1.66", "accuracy": "88.795", "wer": "11.201", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "260600", "lr": "4.51069e-05", "gnorm": "2.151", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "739616"} [2024-07-14 14:17:17,289][train_inner][INFO] - {"epoch": 2, "update": 1.729, "loss": "0.73", "ntokens": "127.15", "acc_total": "127.15", "n_correct": "112.765", "wer_total": "127.15", "n_error": "14.385", "ppl": "1.66", "accuracy": "88.687", "wer": "11.313", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "260800", "lr": "4.4972e-05", "gnorm": "2.043", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "739981"} [2024-07-14 14:23:22,030][train_inner][INFO] - {"epoch": 2, "update": 1.731, "loss": "0.72", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "112.91", "wer_total": "127.095", "n_error": "14.175", "ppl": "1.65", "accuracy": "88.839", "wer": "11.153", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "261000", "lr": "4.48374e-05", "gnorm": "2.153", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "740346"} [2024-07-14 14:26:06,094][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-14 14:29:28,729][train_inner][INFO] - {"epoch": 2, "update": 1.732, "loss": "0.739", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "112.625", "wer_total": "127.145", "n_error": "14.51", "ppl": "1.67", "accuracy": "88.58", "wer": "11.412", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "261200", "lr": "4.47033e-05", "gnorm": "2.18", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "740713"} [2024-07-14 14:35:33,606][train_inner][INFO] - {"epoch": 2, "update": 1.733, "loss": "0.724", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "112.525", "wer_total": "126.785", "n_error": "14.26", "ppl": "1.65", "accuracy": "88.753", "wer": "11.247", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "261400", "lr": "4.45696e-05", "gnorm": "2.097", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "741078"} [2024-07-14 14:41:38,538][train_inner][INFO] - {"epoch": 2, "update": 1.735, "loss": "0.735", "ntokens": "126.75", "acc_total": "126.75", "n_correct": "112.24", "wer_total": "126.75", "n_error": "14.51", "ppl": "1.66", "accuracy": "88.552", "wer": "11.448", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "261600", "lr": "4.44363e-05", "gnorm": "2.085", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "741443"} [2024-07-14 14:47:43,422][train_inner][INFO] - {"epoch": 2, "update": 1.736, "loss": "0.724", "ntokens": "126.565", "acc_total": "126.565", "n_correct": "111.905", "wer_total": "126.565", "n_error": "14.66", "ppl": "1.65", "accuracy": "88.417", "wer": "11.583", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "261800", "lr": "4.43034e-05", "gnorm": "2.029", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "741807"} [2024-07-14 14:53:48,506][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "0.69", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "113.34", "wer_total": "126.9", "n_error": "13.555", "ppl": "1.61", "accuracy": "89.314", "wer": "10.682", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "262000", "lr": "4.41708e-05", "gnorm": "2.048", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "742173"} [2024-07-14 14:59:53,694][train_inner][INFO] - {"epoch": 2, "update": 1.739, "loss": "0.764", "ntokens": "126.055", "acc_total": "126.055", "n_correct": "111.47", "wer_total": "126.055", "n_error": "14.585", "ppl": "1.7", "accuracy": "88.43", "wer": "11.57", "wps": "69", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "262200", "lr": "4.40387e-05", "gnorm": "2.161", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "742538"} [2024-07-14 15:05:59,014][train_inner][INFO] - {"epoch": 2, "update": 1.74, "loss": "0.741", "ntokens": "126.385", "acc_total": "126.385", "n_correct": "111.89", "wer_total": "126.385", "n_error": "14.495", "ppl": "1.67", "accuracy": "88.531", "wer": "11.469", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "262400", "lr": "4.3907e-05", "gnorm": "2.014", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "742903"} [2024-07-14 15:09:01,792][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 15:50:54,766][valid][INFO] - {"epoch": 2, "valid_loss": "0.633", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2915", "valid_wer_total": "18.1585", "valid_n_error": "1.86678", "valid_ppl": "1.55", "valid_accuracy": "89.719", "valid_wer": "10.28", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "262500", "valid_best_accuracy": "89.719"} [2024-07-14 15:50:54,767][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 262500 updates [2024-07-14 15:50:54,767][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_262500.pt [2024-07-14 15:50:58,043][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_262500.pt [2024-07-14 15:51:02,220][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_262500.pt (epoch 2 @ 262500 updates, score 89.719) (writing took 7.453748974017799 seconds) [2024-07-14 15:54:04,435][train_inner][INFO] - {"epoch": 2, "update": 1.741, "loss": "0.695", "ntokens": "126.75", "acc_total": "126.75", "n_correct": "113.14", "wer_total": "126.75", "n_error": "13.6", "ppl": "1.62", "accuracy": "89.262", "wer": "10.73", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "262600", "lr": "4.37757e-05", "gnorm": "1.988", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "745788"} [2024-07-14 16:00:09,620][train_inner][INFO] - {"epoch": 2, "update": 1.743, "loss": "0.724", "ntokens": "125.84", "acc_total": "125.84", "n_correct": "111.56", "wer_total": "125.84", "n_error": "14.27", "ppl": "1.65", "accuracy": "88.652", "wer": "11.34", "wps": "68.9", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "262800", "lr": "4.36447e-05", "gnorm": "2.09", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "746154"} [2024-07-14 16:06:15,123][train_inner][INFO] - {"epoch": 2, "update": 1.744, "loss": "0.725", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "112.1", "wer_total": "126.405", "n_error": "14.3", "ppl": "1.65", "accuracy": "88.683", "wer": "11.313", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "263000", "lr": "4.35142e-05", "gnorm": "2.13", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "746519"} [2024-07-14 16:12:20,576][train_inner][INFO] - {"epoch": 2, "update": 1.745, "loss": "0.723", "ntokens": "127.185", "acc_total": "127.185", "n_correct": "112.645", "wer_total": "127.185", "n_error": "14.54", "ppl": "1.65", "accuracy": "88.568", "wer": "11.432", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "263200", "lr": "4.3384e-05", "gnorm": "2.086", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "746885"} [2024-07-14 16:18:26,258][train_inner][INFO] - {"epoch": 2, "update": 1.747, "loss": "0.69", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "114.235", "wer_total": "127.695", "n_error": "13.46", "ppl": "1.61", "accuracy": "89.459", "wer": "10.541", "wps": "69.8", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "263400", "lr": "4.32542e-05", "gnorm": "2.083", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "747250"} [2024-07-14 16:21:49,040][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-14 16:24:33,372][train_inner][INFO] - {"epoch": 2, "update": 1.748, "loss": "0.709", "ntokens": "126.67", "acc_total": "126.67", "n_correct": "113.125", "wer_total": "126.67", "n_error": "13.545", "ppl": "1.64", "accuracy": "89.307", "wer": "10.693", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "263600", "lr": "4.31248e-05", "gnorm": "1.958", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "747617"} [2024-07-14 16:30:38,585][train_inner][INFO] - {"epoch": 2, "update": 1.749, "loss": "0.732", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "112.26", "wer_total": "126.6", "n_error": "14.335", "ppl": "1.66", "accuracy": "88.673", "wer": "11.323", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "263800", "lr": "4.29958e-05", "gnorm": "2.146", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "747983"} [2024-07-14 16:36:43,345][train_inner][INFO] - {"epoch": 2, "update": 1.751, "loss": "0.737", "ntokens": "127.685", "acc_total": "127.685", "n_correct": "113.255", "wer_total": "127.685", "n_error": "14.43", "ppl": "1.67", "accuracy": "88.699", "wer": "11.301", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "264000", "lr": "4.28672e-05", "gnorm": "2.197", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "748347"} [2024-07-14 16:42:50,826][train_inner][INFO] - {"epoch": 2, "update": 1.752, "loss": "0.702", "ntokens": "127.735", "acc_total": "127.735", "n_correct": "113.78", "wer_total": "127.735", "n_error": "13.955", "ppl": "1.63", "accuracy": "89.075", "wer": "10.925", "wps": "69.5", "ups": "0.54", "wpb": "127.7", "bsz": "8", "num_updates": "264200", "lr": "4.2739e-05", "gnorm": "2.119", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "748715"} [2024-07-14 16:48:55,543][train_inner][INFO] - {"epoch": 2, "update": 1.753, "loss": "0.708", "ntokens": "128.575", "acc_total": "128.575", "n_correct": "114.505", "wer_total": "128.575", "n_error": "14.07", "ppl": "1.63", "accuracy": "89.057", "wer": "10.943", "wps": "70.5", "ups": "0.55", "wpb": "128.6", "bsz": "8", "num_updates": "264400", "lr": "4.26112e-05", "gnorm": "2.045", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "749080"} [2024-07-14 16:55:03,071][train_inner][INFO] - {"epoch": 2, "update": 1.755, "loss": "0.727", "ntokens": "126.855", "acc_total": "126.855", "n_correct": "112.73", "wer_total": "126.855", "n_error": "14.125", "ppl": "1.65", "accuracy": "88.865", "wer": "11.135", "wps": "69", "ups": "0.54", "wpb": "126.9", "bsz": "8", "num_updates": "264600", "lr": "4.24837e-05", "gnorm": "2.106", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "749447"} [2024-07-14 17:01:06,284][train_inner][INFO] - {"epoch": 2, "update": 1.756, "loss": "0.731", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "112.47", "wer_total": "126.705", "n_error": "14.235", "ppl": "1.66", "accuracy": "88.765", "wer": "11.235", "wps": "69.8", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "264800", "lr": "4.23566e-05", "gnorm": "2.105", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "749810"} [2024-07-14 17:07:09,665][train_inner][INFO] - {"epoch": 2, "update": 1.757, "loss": "0.751", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "111.765", "wer_total": "126.76", "n_error": "14.99", "ppl": "1.68", "accuracy": "88.171", "wer": "11.825", "wps": "69.8", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "265000", "lr": "4.22299e-05", "gnorm": "2.143", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "750174"} [2024-07-14 17:07:09,666][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 17:48:48,376][valid][INFO] - {"epoch": 2, "valid_loss": "0.632", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2944", "valid_wer_total": "18.1585", "valid_n_error": "1.86403", "valid_ppl": "1.55", "valid_accuracy": "89.734", "valid_wer": "10.265", "valid_wps": "174.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "265000", "valid_best_accuracy": "89.734"} [2024-07-14 17:48:48,377][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 265000 updates [2024-07-14 17:48:48,377][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_265000.pt [2024-07-14 17:48:51,597][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_265000.pt [2024-07-14 17:48:55,937][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_265000.pt (epoch 2 @ 265000 updates, score 89.734) (writing took 7.560451127006672 seconds) [2024-07-14 17:54:59,253][train_inner][INFO] - {"epoch": 2, "update": 1.759, "loss": "0.717", "ntokens": "126.69", "acc_total": "126.69", "n_correct": "112.72", "wer_total": "126.69", "n_error": "13.965", "ppl": "1.64", "accuracy": "88.973", "wer": "11.023", "wps": "8.8", "ups": "0.07", "wpb": "126.7", "bsz": "8", "num_updates": "265200", "lr": "4.21036e-05", "gnorm": "2.063", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "753043"} [2024-07-14 18:01:02,747][train_inner][INFO] - {"epoch": 2, "update": 1.76, "loss": "0.699", "ntokens": "127.335", "acc_total": "127.335", "n_correct": "113.655", "wer_total": "127.335", "n_error": "13.68", "ppl": "1.62", "accuracy": "89.257", "wer": "10.743", "wps": "70.1", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "265400", "lr": "4.19777e-05", "gnorm": "2.173", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "753407"} [2024-07-14 18:07:06,047][train_inner][INFO] - {"epoch": 2, "update": 1.761, "loss": "0.714", "ntokens": "126.02", "acc_total": "126.02", "n_correct": "112.045", "wer_total": "126.02", "n_error": "13.975", "ppl": "1.64", "accuracy": "88.91", "wer": "11.09", "wps": "69.4", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "265600", "lr": "4.18521e-05", "gnorm": "2.028", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "753770"} [2024-07-14 18:13:09,404][train_inner][INFO] - {"epoch": 2, "update": 1.763, "loss": "0.751", "ntokens": "125.935", "acc_total": "125.935", "n_correct": "111.34", "wer_total": "125.935", "n_error": "14.595", "ppl": "1.68", "accuracy": "88.411", "wer": "11.589", "wps": "69.3", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "265800", "lr": "4.17269e-05", "gnorm": "2.16", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "754133"} [2024-07-14 18:19:12,625][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "0.72", "ntokens": "127.065", "acc_total": "127.065", "n_correct": "112.98", "wer_total": "127.065", "n_error": "14.085", "ppl": "1.65", "accuracy": "88.915", "wer": "11.085", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "266000", "lr": "4.16021e-05", "gnorm": "2.084", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "754497"} [2024-07-14 18:24:16,145][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-14 18:25:17,953][train_inner][INFO] - {"epoch": 2, "update": 1.765, "loss": "0.753", "ntokens": "126.62", "acc_total": "126.62", "n_correct": "112.08", "wer_total": "126.62", "n_error": "14.535", "ppl": "1.69", "accuracy": "88.517", "wer": "11.479", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "266200", "lr": "4.14776e-05", "gnorm": "2.121", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "754862"} [2024-07-14 18:31:21,308][train_inner][INFO] - {"epoch": 2, "update": 1.767, "loss": "0.671", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "114.1", "wer_total": "127.405", "n_error": "13.305", "ppl": "1.59", "accuracy": "89.557", "wer": "10.443", "wps": "70.1", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "266400", "lr": "4.13536e-05", "gnorm": "1.956", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "755225"} [2024-07-14 18:37:24,762][train_inner][INFO] - {"epoch": 2, "update": 1.768, "loss": "0.759", "ntokens": "127.035", "acc_total": "127.035", "n_correct": "112.365", "wer_total": "127.035", "n_error": "14.67", "ppl": "1.69", "accuracy": "88.452", "wer": "11.548", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "266600", "lr": "4.12299e-05", "gnorm": "2.122", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "755589"} [2024-07-14 18:43:28,029][train_inner][INFO] - {"epoch": 2, "update": 1.769, "loss": "0.708", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "113.045", "wer_total": "126.9", "n_error": "13.85", "ppl": "1.63", "accuracy": "89.082", "wer": "10.914", "wps": "69.9", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "266800", "lr": "4.11065e-05", "gnorm": "2.04", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "755952"} [2024-07-14 18:49:31,615][train_inner][INFO] - {"epoch": 2, "update": 1.771, "loss": "0.735", "ntokens": "127.375", "acc_total": "127.375", "n_correct": "112.655", "wer_total": "127.375", "n_error": "14.715", "ppl": "1.66", "accuracy": "88.444", "wer": "11.553", "wps": "70.1", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "267000", "lr": "4.09836e-05", "gnorm": "2.051", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "756316"} [2024-07-14 18:55:34,857][train_inner][INFO] - {"epoch": 2, "update": 1.772, "loss": "0.74", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "112.385", "wer_total": "126.93", "n_error": "14.545", "ppl": "1.67", "accuracy": "88.541", "wer": "11.459", "wps": "69.9", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "267200", "lr": "4.0861e-05", "gnorm": "2.1", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "756679"} [2024-07-14 19:01:38,463][train_inner][INFO] - {"epoch": 2, "update": 1.773, "loss": "0.7", "ntokens": "126.79", "acc_total": "126.79", "n_correct": "112.72", "wer_total": "126.79", "n_error": "14.07", "ppl": "1.62", "accuracy": "88.903", "wer": "11.097", "wps": "69.7", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "267400", "lr": "4.07388e-05", "gnorm": "2.019", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "757043"} [2024-07-14 19:04:40,202][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 19:46:18,378][valid][INFO] - {"epoch": 2, "valid_loss": "0.63", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2892", "valid_wer_total": "18.1585", "valid_n_error": "1.86911", "valid_ppl": "1.55", "valid_accuracy": "89.706", "valid_wer": "10.293", "valid_wps": "174.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "267500", "valid_best_accuracy": "89.734"} [2024-07-14 19:46:18,379][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 267500 updates [2024-07-14 19:46:18,379][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_267500.pt [2024-07-14 19:46:21,636][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_267500.pt [2024-07-14 19:46:23,779][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_267500.pt (epoch 2 @ 267500 updates, score 89.706) (writing took 5.400172743946314 seconds) [2024-07-14 19:49:25,389][train_inner][INFO] - {"epoch": 2, "update": 1.775, "loss": "0.725", "ntokens": "126.5", "acc_total": "126.5", "n_correct": "112.37", "wer_total": "126.5", "n_error": "14.13", "ppl": "1.65", "accuracy": "88.83", "wer": "11.17", "wps": "8.8", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "267600", "lr": "4.06169e-05", "gnorm": "2.165", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "759909"} [2024-07-14 19:55:28,765][train_inner][INFO] - {"epoch": 2, "update": 1.776, "loss": "0.74", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "112.305", "wer_total": "127.17", "n_error": "14.865", "ppl": "1.67", "accuracy": "88.311", "wer": "11.689", "wps": "70", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "267800", "lr": "4.04954e-05", "gnorm": "2.075", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "760273"} [2024-07-14 20:01:32,252][train_inner][INFO] - {"epoch": 2, "update": 1.777, "loss": "0.692", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "113.075", "wer_total": "126.945", "n_error": "13.865", "ppl": "1.62", "accuracy": "89.074", "wer": "10.922", "wps": "69.8", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "268000", "lr": "4.03743e-05", "gnorm": "2.089", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "760636"} [2024-07-14 20:07:35,877][train_inner][INFO] - {"epoch": 2, "update": 1.779, "loss": "0.701", "ntokens": "127.05", "acc_total": "127.05", "n_correct": "112.955", "wer_total": "127.05", "n_error": "14.095", "ppl": "1.63", "accuracy": "88.906", "wer": "11.094", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "268200", "lr": "4.02535e-05", "gnorm": "2.091", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "761000"} [2024-07-14 20:08:44,900][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-14 20:13:41,163][train_inner][INFO] - {"epoch": 2, "update": 1.78, "loss": "0.715", "ntokens": "126.995", "acc_total": "126.995", "n_correct": "112.99", "wer_total": "126.995", "n_error": "14", "ppl": "1.64", "accuracy": "88.972", "wer": "11.024", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "268400", "lr": "4.01331e-05", "gnorm": "2.051", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "761365"} [2024-07-14 20:19:44,841][train_inner][INFO] - {"epoch": 2, "update": 1.781, "loss": "0.734", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "112.72", "wer_total": "127.145", "n_error": "14.425", "ppl": "1.66", "accuracy": "88.655", "wer": "11.345", "wps": "69.9", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "268600", "lr": "4.00131e-05", "gnorm": "2.116", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "761729"} [2024-07-14 20:25:48,511][train_inner][INFO] - {"epoch": 2, "update": 1.783, "loss": "0.748", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "112.025", "wer_total": "126.6", "n_error": "14.575", "ppl": "1.68", "accuracy": "88.487", "wer": "11.513", "wps": "69.6", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "268800", "lr": "3.98934e-05", "gnorm": "2.113", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "762093"} [2024-07-14 20:31:52,182][train_inner][INFO] - {"epoch": 2, "update": 1.784, "loss": "0.686", "ntokens": "125.75", "acc_total": "125.75", "n_correct": "112.12", "wer_total": "125.75", "n_error": "13.63", "ppl": "1.61", "accuracy": "89.161", "wer": "10.839", "wps": "69.2", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "269000", "lr": "3.9774e-05", "gnorm": "2.102", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "762456"} [2024-07-14 20:37:56,114][train_inner][INFO] - {"epoch": 2, "update": 1.785, "loss": "0.686", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "112.185", "wer_total": "125.9", "n_error": "13.715", "ppl": "1.61", "accuracy": "89.106", "wer": "10.894", "wps": "69.2", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "269200", "lr": "3.96551e-05", "gnorm": "2.093", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "762820"} [2024-07-14 20:43:59,693][train_inner][INFO] - {"epoch": 2, "update": 1.787, "loss": "0.715", "ntokens": "126.63", "acc_total": "126.63", "n_correct": "112.48", "wer_total": "126.63", "n_error": "14.15", "ppl": "1.64", "accuracy": "88.826", "wer": "11.174", "wps": "69.7", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "269400", "lr": "3.95364e-05", "gnorm": "2.081", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "763184"} [2024-07-14 20:50:03,349][train_inner][INFO] - {"epoch": 2, "update": 1.788, "loss": "0.677", "ntokens": "126.57", "acc_total": "126.57", "n_correct": "112.805", "wer_total": "126.57", "n_error": "13.76", "ppl": "1.6", "accuracy": "89.125", "wer": "10.871", "wps": "69.6", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "269600", "lr": "3.94182e-05", "gnorm": "2.102", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "763547"} [2024-07-14 20:56:06,874][train_inner][INFO] - {"epoch": 2, "update": 1.789, "loss": "0.726", "ntokens": "127.23", "acc_total": "127.23", "n_correct": "112.99", "wer_total": "127.23", "n_error": "14.24", "ppl": "1.65", "accuracy": "88.808", "wer": "11.192", "wps": "70", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "269800", "lr": "3.93003e-05", "gnorm": "1.977", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "763911"} [2024-07-14 21:02:10,509][train_inner][INFO] - {"epoch": 2, "update": 1.791, "loss": "0.752", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "112.745", "wer_total": "127.26", "n_error": "14.515", "ppl": "1.68", "accuracy": "88.594", "wer": "11.406", "wps": "70", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "270000", "lr": "3.91827e-05", "gnorm": "2.268", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "764275"} [2024-07-14 21:02:10,510][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 21:43:51,589][valid][INFO] - {"epoch": 2, "valid_loss": "0.625", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3079", "valid_wer_total": "18.1585", "valid_n_error": "1.85031", "valid_ppl": "1.54", "valid_accuracy": "89.809", "valid_wer": "10.19", "valid_wps": "174.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "270000", "valid_best_accuracy": "89.809"} [2024-07-14 21:43:51,590][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 270000 updates [2024-07-14 21:43:51,590][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_270000.pt [2024-07-14 21:43:54,818][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_270000.pt [2024-07-14 21:43:58,976][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_270000.pt (epoch 2 @ 270000 updates, score 89.809) (writing took 7.38581104693003 seconds) [2024-07-14 21:50:02,883][train_inner][INFO] - {"epoch": 2, "update": 1.792, "loss": "0.723", "ntokens": "126.985", "acc_total": "126.985", "n_correct": "112.38", "wer_total": "126.985", "n_error": "14.6", "ppl": "1.65", "accuracy": "88.499", "wer": "11.497", "wps": "8.8", "ups": "0.07", "wpb": "127", "bsz": "8", "num_updates": "270200", "lr": "3.90655e-05", "gnorm": "2.159", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "767147"} [2024-07-14 21:56:06,704][train_inner][INFO] - {"epoch": 2, "update": 1.793, "loss": "0.714", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "113.205", "wer_total": "126.87", "n_error": "13.665", "ppl": "1.64", "accuracy": "89.229", "wer": "10.771", "wps": "69.7", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "270400", "lr": "3.89487e-05", "gnorm": "1.931", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "767511"} [2024-07-14 22:02:10,861][train_inner][INFO] - {"epoch": 2, "update": 1.794, "loss": "0.7", "ntokens": "127.81", "acc_total": "127.81", "n_correct": "113.89", "wer_total": "127.81", "n_error": "13.92", "ppl": "1.62", "accuracy": "89.109", "wer": "10.891", "wps": "70.2", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "270600", "lr": "3.88321e-05", "gnorm": "1.99", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "767875"} [2024-07-14 22:08:14,569][train_inner][INFO] - {"epoch": 2, "update": 1.796, "loss": "0.712", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "112.45", "wer_total": "126.295", "n_error": "13.845", "ppl": "1.64", "accuracy": "89.038", "wer": "10.962", "wps": "69.4", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "270800", "lr": "3.8716e-05", "gnorm": "2.081", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "768239"} [2024-07-14 22:14:18,187][train_inner][INFO] - {"epoch": 2, "update": 1.797, "loss": "0.686", "ntokens": "125.87", "acc_total": "125.87", "n_correct": "112.165", "wer_total": "125.87", "n_error": "13.705", "ppl": "1.61", "accuracy": "89.112", "wer": "10.888", "wps": "69.2", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "271000", "lr": "3.86002e-05", "gnorm": "2.121", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "768602"} [2024-07-14 22:20:21,879][train_inner][INFO] - {"epoch": 2, "update": 1.798, "loss": "0.711", "ntokens": "126.55", "acc_total": "126.55", "n_correct": "112.545", "wer_total": "126.55", "n_error": "14.005", "ppl": "1.64", "accuracy": "88.933", "wer": "11.067", "wps": "69.6", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "271200", "lr": "3.84847e-05", "gnorm": "2.01", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "768966"} [2024-07-14 22:26:25,584][train_inner][INFO] - {"epoch": 2, "update": 1.8, "loss": "0.734", "ntokens": "127.215", "acc_total": "127.215", "n_correct": "113.035", "wer_total": "127.215", "n_error": "14.18", "ppl": "1.66", "accuracy": "88.854", "wer": "11.146", "wps": "70", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "271400", "lr": "3.83696e-05", "gnorm": "2.112", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "769330"} [2024-07-14 22:32:29,364][train_inner][INFO] - {"epoch": 2, "update": 1.801, "loss": "0.71", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "112.64", "wer_total": "126.705", "n_error": "14.06", "ppl": "1.64", "accuracy": "88.899", "wer": "11.097", "wps": "69.7", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "271600", "lr": "3.82548e-05", "gnorm": "2.037", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "769693"} [2024-07-14 22:38:33,106][train_inner][INFO] - {"epoch": 2, "update": 1.802, "loss": "0.695", "ntokens": "126.145", "acc_total": "126.145", "n_correct": "112.36", "wer_total": "126.145", "n_error": "13.785", "ppl": "1.62", "accuracy": "89.072", "wer": "10.928", "wps": "69.4", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "271800", "lr": "3.81404e-05", "gnorm": "1.935", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "770057"} [2024-07-14 22:44:36,884][train_inner][INFO] - {"epoch": 2, "update": 1.804, "loss": "0.726", "ntokens": "127.71", "acc_total": "127.71", "n_correct": "113.355", "wer_total": "127.71", "n_error": "14.355", "ppl": "1.65", "accuracy": "88.76", "wer": "11.24", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "272000", "lr": "3.80263e-05", "gnorm": "2.047", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "770421"} [2024-07-14 22:50:40,520][train_inner][INFO] - {"epoch": 2, "update": 1.805, "loss": "0.699", "ntokens": "126.145", "acc_total": "126.145", "n_correct": "112.27", "wer_total": "126.145", "n_error": "13.875", "ppl": "1.62", "accuracy": "89.001", "wer": "10.999", "wps": "69.4", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "272200", "lr": "3.79126e-05", "gnorm": "1.958", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "770785"} [2024-07-14 22:55:22,257][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2048.0 [2024-07-14 22:56:46,028][train_inner][INFO] - {"epoch": 2, "update": 1.806, "loss": "0.697", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "113.22", "wer_total": "126.9", "n_error": "13.68", "ppl": "1.62", "accuracy": "89.22", "wer": "10.78", "wps": "69.4", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "272400", "lr": "3.77992e-05", "gnorm": "2", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "771150"} [2024-07-14 22:59:47,757][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-14 23:41:26,490][valid][INFO] - {"epoch": 2, "valid_loss": "0.628", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.294", "valid_wer_total": "18.1585", "valid_n_error": "1.86436", "valid_ppl": "1.55", "valid_accuracy": "89.732", "valid_wer": "10.267", "valid_wps": "174.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "272500", "valid_best_accuracy": "89.809"} [2024-07-14 23:41:26,490][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 272500 updates [2024-07-14 23:41:26,491][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_272500.pt [2024-07-14 23:41:29,735][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_272500.pt [2024-07-14 23:41:31,892][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_272500.pt (epoch 2 @ 272500 updates, score 89.732) (writing took 5.401293240953237 seconds) [2024-07-14 23:44:33,415][train_inner][INFO] - {"epoch": 2, "update": 1.808, "loss": "0.716", "ntokens": "126.11", "acc_total": "126.11", "n_correct": "112.02", "wer_total": "126.11", "n_error": "14.085", "ppl": "1.64", "accuracy": "88.827", "wer": "11.169", "wps": "8.8", "ups": "0.07", "wpb": "126.1", "bsz": "8", "num_updates": "272600", "lr": "3.76861e-05", "gnorm": "2.1", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "774017"} [2024-07-14 23:49:33,339][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-14 23:50:38,818][train_inner][INFO] - {"epoch": 2, "update": 1.809, "loss": "0.707", "ntokens": "127.81", "acc_total": "127.81", "n_correct": "113.72", "wer_total": "127.81", "n_error": "14.09", "ppl": "1.63", "accuracy": "88.976", "wer": "11.024", "wps": "70", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "272800", "lr": "3.75734e-05", "gnorm": "2.102", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "774383"} [2024-07-14 23:56:42,388][train_inner][INFO] - {"epoch": 2, "update": 1.81, "loss": "0.734", "ntokens": "127.065", "acc_total": "127.065", "n_correct": "112.775", "wer_total": "127.065", "n_error": "14.285", "ppl": "1.66", "accuracy": "88.754", "wer": "11.242", "wps": "69.9", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "273000", "lr": "3.7461e-05", "gnorm": "2.097", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "774746"} [2024-07-15 00:02:46,193][train_inner][INFO] - {"epoch": 2, "update": 1.812, "loss": "0.752", "ntokens": "126.605", "acc_total": "126.605", "n_correct": "111.875", "wer_total": "126.605", "n_error": "14.73", "ppl": "1.68", "accuracy": "88.365", "wer": "11.635", "wps": "69.6", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "273200", "lr": "3.73489e-05", "gnorm": "2.171", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "775110"} [2024-07-15 00:08:49,944][train_inner][INFO] - {"epoch": 2, "update": 1.813, "loss": "0.685", "ntokens": "127.95", "acc_total": "127.95", "n_correct": "114.25", "wer_total": "127.95", "n_error": "13.7", "ppl": "1.61", "accuracy": "89.293", "wer": "10.707", "wps": "70.4", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "273400", "lr": "3.72372e-05", "gnorm": "2.087", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "775474"} [2024-07-15 00:14:53,890][train_inner][INFO] - {"epoch": 2, "update": 1.814, "loss": "0.744", "ntokens": "126.99", "acc_total": "126.99", "n_correct": "112.555", "wer_total": "126.99", "n_error": "14.435", "ppl": "1.67", "accuracy": "88.633", "wer": "11.367", "wps": "69.8", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "273600", "lr": "3.71258e-05", "gnorm": "2.094", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "775838"} [2024-07-15 00:20:57,913][train_inner][INFO] - {"epoch": 2, "update": 1.816, "loss": "0.72", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "112.38", "wer_total": "126.97", "n_error": "14.59", "ppl": "1.65", "accuracy": "88.509", "wer": "11.491", "wps": "69.8", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "273800", "lr": "3.70148e-05", "gnorm": "2.022", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "776202"} [2024-07-15 00:27:01,808][train_inner][INFO] - {"epoch": 2, "update": 1.817, "loss": "0.7", "ntokens": "126.365", "acc_total": "126.365", "n_correct": "112.485", "wer_total": "126.365", "n_error": "13.88", "ppl": "1.62", "accuracy": "89.016", "wer": "10.984", "wps": "69.5", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "274000", "lr": "3.6904e-05", "gnorm": "2.001", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "776566"} [2024-07-15 00:27:07,209][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-15 00:33:07,803][train_inner][INFO] - {"epoch": 2, "update": 1.818, "loss": "0.697", "ntokens": "127.515", "acc_total": "127.515", "n_correct": "113.725", "wer_total": "127.515", "n_error": "13.78", "ppl": "1.62", "accuracy": "89.186", "wer": "10.807", "wps": "69.7", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "274200", "lr": "3.67937e-05", "gnorm": "2.028", "loss_scale": "512", "train_wall": "365", "gb_free": "6.5", "wall": "776932"} [2024-07-15 00:39:10,777][train_inner][INFO] - {"epoch": 2, "update": 1.82, "loss": "0.704", "ntokens": "126.385", "acc_total": "126.385", "n_correct": "112.47", "wer_total": "126.385", "n_error": "13.915", "ppl": "1.63", "accuracy": "88.99", "wer": "11.01", "wps": "69.6", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "274400", "lr": "3.66836e-05", "gnorm": "2.082", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "777295"} [2024-07-15 00:45:13,999][train_inner][INFO] - {"epoch": 2, "update": 1.821, "loss": "0.707", "ntokens": "127.13", "acc_total": "127.13", "n_correct": "113.255", "wer_total": "127.13", "n_error": "13.87", "ppl": "1.63", "accuracy": "89.086", "wer": "10.91", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "274600", "lr": "3.65739e-05", "gnorm": "2.064", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "777658"} [2024-07-15 00:51:17,254][train_inner][INFO] - {"epoch": 2, "update": 1.822, "loss": "0.688", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "112.945", "wer_total": "126.255", "n_error": "13.305", "ppl": "1.61", "accuracy": "89.458", "wer": "10.538", "wps": "69.5", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "274800", "lr": "3.64645e-05", "gnorm": "1.996", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "778021"} [2024-07-15 00:57:20,545][train_inner][INFO] - {"epoch": 2, "update": 1.824, "loss": "0.729", "ntokens": "126.035", "acc_total": "126.035", "n_correct": "111.705", "wer_total": "126.035", "n_error": "14.33", "ppl": "1.66", "accuracy": "88.63", "wer": "11.37", "wps": "69.4", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "275000", "lr": "3.63554e-05", "gnorm": "2.026", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "778385"} [2024-07-15 00:57:20,546][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 01:38:56,645][valid][INFO] - {"epoch": 2, "valid_loss": "0.623", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.2997", "valid_wer_total": "18.1585", "valid_n_error": "1.85852", "valid_ppl": "1.54", "valid_accuracy": "89.764", "valid_wer": "10.235", "valid_wps": "174.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "275000", "valid_best_accuracy": "89.809"} [2024-07-15 01:38:56,646][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 275000 updates [2024-07-15 01:38:56,646][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_275000.pt [2024-07-15 01:38:59,873][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_275000.pt [2024-07-15 01:39:02,115][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_275000.pt (epoch 2 @ 275000 updates, score 89.764) (writing took 5.469250140944496 seconds) [2024-07-15 01:45:04,909][train_inner][INFO] - {"epoch": 2, "update": 1.825, "loss": "0.721", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "112.925", "wer_total": "127.095", "n_error": "14.16", "ppl": "1.65", "accuracy": "88.851", "wer": "11.141", "wps": "8.9", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "275200", "lr": "3.62466e-05", "gnorm": "2.098", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "781249"} [2024-07-15 01:51:07,880][train_inner][INFO] - {"epoch": 2, "update": 1.826, "loss": "0.7", "ntokens": "128.595", "acc_total": "128.595", "n_correct": "114.805", "wer_total": "128.595", "n_error": "13.79", "ppl": "1.62", "accuracy": "89.276", "wer": "10.724", "wps": "70.9", "ups": "0.55", "wpb": "128.6", "bsz": "8", "num_updates": "275400", "lr": "3.61382e-05", "gnorm": "2.037", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "781612"} [2024-07-15 01:57:11,111][train_inner][INFO] - {"epoch": 2, "update": 1.828, "loss": "0.746", "ntokens": "126.825", "acc_total": "126.825", "n_correct": "112.035", "wer_total": "126.825", "n_error": "14.785", "ppl": "1.68", "accuracy": "88.338", "wer": "11.658", "wps": "69.8", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "275600", "lr": "3.60301e-05", "gnorm": "2.009", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "781975"} [2024-07-15 02:03:14,096][train_inner][INFO] - {"epoch": 2, "update": 1.829, "loss": "0.747", "ntokens": "126.34", "acc_total": "126.34", "n_correct": "112.18", "wer_total": "126.34", "n_error": "14.16", "ppl": "1.68", "accuracy": "88.792", "wer": "11.208", "wps": "69.6", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "275800", "lr": "3.59223e-05", "gnorm": "2.108", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "782338"} [2024-07-15 02:09:17,012][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "0.712", "ntokens": "125.66", "acc_total": "125.66", "n_correct": "111.57", "wer_total": "125.66", "n_error": "14.08", "ppl": "1.64", "accuracy": "88.787", "wer": "11.205", "wps": "69.3", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "276000", "lr": "3.58149e-05", "gnorm": "2.037", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "782701"} [2024-07-15 02:15:20,205][train_inner][INFO] - {"epoch": 2, "update": 1.832, "loss": "0.707", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "112.87", "wer_total": "126.625", "n_error": "13.755", "ppl": "1.63", "accuracy": "89.137", "wer": "10.863", "wps": "69.7", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "276200", "lr": "3.57078e-05", "gnorm": "2.073", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "783064"} [2024-07-15 02:21:23,333][train_inner][INFO] - {"epoch": 2, "update": 1.833, "loss": "0.663", "ntokens": "127.105", "acc_total": "127.105", "n_correct": "113.83", "wer_total": "127.105", "n_error": "13.27", "ppl": "1.58", "accuracy": "89.556", "wer": "10.44", "wps": "70", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "276400", "lr": "3.56009e-05", "gnorm": "2.109", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "783427"} [2024-07-15 02:27:26,142][train_inner][INFO] - {"epoch": 2, "update": 1.834, "loss": "0.716", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "112.7", "wer_total": "126.915", "n_error": "14.21", "ppl": "1.64", "accuracy": "88.8", "wer": "11.196", "wps": "70", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "276600", "lr": "3.54945e-05", "gnorm": "2.125", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "783790"} [2024-07-15 02:33:29,062][train_inner][INFO] - {"epoch": 2, "update": 1.836, "loss": "0.689", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "112.63", "wer_total": "125.995", "n_error": "13.36", "ppl": "1.61", "accuracy": "89.392", "wer": "10.604", "wps": "69.4", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "276800", "lr": "3.53883e-05", "gnorm": "1.91", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "784153"} [2024-07-15 02:39:32,047][train_inner][INFO] - {"epoch": 2, "update": 1.837, "loss": "0.695", "ntokens": "126.34", "acc_total": "126.34", "n_correct": "112.725", "wer_total": "126.34", "n_error": "13.61", "ppl": "1.62", "accuracy": "89.224", "wer": "10.773", "wps": "69.6", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "277000", "lr": "3.52824e-05", "gnorm": "1.979", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "784516"} [2024-07-15 02:45:34,846][train_inner][INFO] - {"epoch": 2, "update": 1.838, "loss": "0.722", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "111.93", "wer_total": "126.01", "n_error": "14.08", "ppl": "1.65", "accuracy": "88.826", "wer": "11.174", "wps": "69.5", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "277200", "lr": "3.51769e-05", "gnorm": "2", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "784879"} [2024-07-15 02:51:37,719][train_inner][INFO] - {"epoch": 2, "update": 1.84, "loss": "0.674", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "113.15", "wer_total": "126.27", "n_error": "13.12", "ppl": "1.6", "accuracy": "89.61", "wer": "10.39", "wps": "69.6", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "277400", "lr": "3.50717e-05", "gnorm": "1.967", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "785242"} [2024-07-15 02:54:39,248][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 03:36:14,007][valid][INFO] - {"epoch": 2, "valid_loss": "0.62", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3142", "valid_wer_total": "18.1585", "valid_n_error": "1.8441", "valid_ppl": "1.54", "valid_accuracy": "89.843", "valid_wer": "10.156", "valid_wps": "174.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "277500", "valid_best_accuracy": "89.843"} [2024-07-15 03:36:14,007][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 277500 updates [2024-07-15 03:36:14,008][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_277500.pt [2024-07-15 03:36:17,235][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_277500.pt [2024-07-15 03:36:22,627][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_277500.pt (epoch 2 @ 277500 updates, score 89.843) (writing took 8.619841936044395 seconds) [2024-07-15 03:39:23,856][train_inner][INFO] - {"epoch": 2, "update": 1.841, "loss": "0.718", "ntokens": "125.925", "acc_total": "125.925", "n_correct": "112.055", "wer_total": "125.925", "n_error": "13.86", "ppl": "1.65", "accuracy": "88.986", "wer": "11.007", "wps": "8.8", "ups": "0.07", "wpb": "125.9", "bsz": "8", "num_updates": "277600", "lr": "3.49668e-05", "gnorm": "2.055", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "788108"} [2024-07-15 03:45:26,504][train_inner][INFO] - {"epoch": 2, "update": 1.842, "loss": "0.707", "ntokens": "125.96", "acc_total": "125.96", "n_correct": "111.79", "wer_total": "125.96", "n_error": "14.17", "ppl": "1.63", "accuracy": "88.75", "wer": "11.25", "wps": "69.5", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "277800", "lr": "3.48622e-05", "gnorm": "2.193", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "788471"} [2024-07-15 03:51:29,399][train_inner][INFO] - {"epoch": 2, "update": 1.844, "loss": "0.697", "ntokens": "126.36", "acc_total": "126.36", "n_correct": "112.635", "wer_total": "126.36", "n_error": "13.725", "ppl": "1.62", "accuracy": "89.138", "wer": "10.862", "wps": "69.6", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "278000", "lr": "3.47579e-05", "gnorm": "2.096", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "788833"} [2024-07-15 03:57:32,273][train_inner][INFO] - {"epoch": 2, "update": 1.845, "loss": "0.725", "ntokens": "125.88", "acc_total": "125.88", "n_correct": "111.725", "wer_total": "125.88", "n_error": "14.15", "ppl": "1.65", "accuracy": "88.755", "wer": "11.241", "wps": "69.4", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "278200", "lr": "3.46539e-05", "gnorm": "2.152", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "789196"} [2024-07-15 04:03:35,004][train_inner][INFO] - {"epoch": 2, "update": 1.846, "loss": "0.675", "ntokens": "127.045", "acc_total": "127.045", "n_correct": "113.435", "wer_total": "127.045", "n_error": "13.61", "ppl": "1.6", "accuracy": "89.287", "wer": "10.713", "wps": "70", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "278400", "lr": "3.45503e-05", "gnorm": "1.929", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "789559"} [2024-07-15 04:09:38,123][train_inner][INFO] - {"epoch": 2, "update": 1.848, "loss": "0.69", "ntokens": "127.555", "acc_total": "127.555", "n_correct": "113.68", "wer_total": "127.555", "n_error": "13.875", "ppl": "1.61", "accuracy": "89.122", "wer": "10.878", "wps": "70.3", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "278600", "lr": "3.44469e-05", "gnorm": "2.019", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "789922"} [2024-07-15 04:15:40,773][train_inner][INFO] - {"epoch": 2, "update": 1.849, "loss": "0.688", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "112.84", "wer_total": "126.515", "n_error": "13.67", "ppl": "1.61", "accuracy": "89.191", "wer": "10.805", "wps": "69.8", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "278800", "lr": "3.43439e-05", "gnorm": "2.048", "loss_scale": "2048", "train_wall": "362", "gb_free": "6.5", "wall": "790285"} [2024-07-15 04:21:30,821][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 04:21:45,356][train_inner][INFO] - {"epoch": 2, "update": 1.85, "loss": "0.694", "ntokens": "126.08", "acc_total": "126.08", "n_correct": "112.275", "wer_total": "126.08", "n_error": "13.8", "ppl": "1.62", "accuracy": "89.051", "wer": "10.945", "wps": "69.2", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "279000", "lr": "3.42411e-05", "gnorm": "2.065", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "790649"} [2024-07-15 04:22:45,315][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 512.0 [2024-07-15 04:27:50,252][train_inner][INFO] - {"epoch": 2, "update": 1.852, "loss": "0.689", "ntokens": "128.77", "acc_total": "128.77", "n_correct": "114.68", "wer_total": "128.77", "n_error": "14.09", "ppl": "1.61", "accuracy": "89.058", "wer": "10.942", "wps": "70.6", "ups": "0.55", "wpb": "128.8", "bsz": "8", "num_updates": "279200", "lr": "3.41387e-05", "gnorm": "2.076", "loss_scale": "512", "train_wall": "364", "gb_free": "6.5", "wall": "791014"} [2024-07-15 04:33:53,014][train_inner][INFO] - {"epoch": 2, "update": 1.853, "loss": "0.732", "ntokens": "126.62", "acc_total": "126.62", "n_correct": "112.24", "wer_total": "126.62", "n_error": "14.38", "ppl": "1.66", "accuracy": "88.643", "wer": "11.357", "wps": "69.8", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "279400", "lr": "3.40366e-05", "gnorm": "2.002", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "791377"} [2024-07-15 04:39:55,592][train_inner][INFO] - {"epoch": 2, "update": 1.854, "loss": "0.694", "ntokens": "127.555", "acc_total": "127.555", "n_correct": "113.735", "wer_total": "127.555", "n_error": "13.82", "ppl": "1.62", "accuracy": "89.165", "wer": "10.835", "wps": "70.4", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "279600", "lr": "3.39348e-05", "gnorm": "2.017", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "791740"} [2024-07-15 04:45:58,269][train_inner][INFO] - {"epoch": 2, "update": 1.856, "loss": "0.713", "ntokens": "126.945", "acc_total": "126.945", "n_correct": "112.77", "wer_total": "126.945", "n_error": "14.175", "ppl": "1.64", "accuracy": "88.834", "wer": "11.166", "wps": "70", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "279800", "lr": "3.38333e-05", "gnorm": "2.006", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "792102"} [2024-07-15 04:52:01,087][train_inner][INFO] - {"epoch": 2, "update": 1.857, "loss": "0.715", "ntokens": "126.82", "acc_total": "126.82", "n_correct": "112.975", "wer_total": "126.82", "n_error": "13.845", "ppl": "1.64", "accuracy": "89.083", "wer": "10.917", "wps": "69.9", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "280000", "lr": "3.37321e-05", "gnorm": "1.998", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "792465"} [2024-07-15 04:52:01,088][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 05:33:34,211][valid][INFO] - {"epoch": 2, "valid_loss": "0.619", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3138", "valid_wer_total": "18.1585", "valid_n_error": "1.8446", "valid_ppl": "1.54", "valid_accuracy": "89.841", "valid_wer": "10.158", "valid_wps": "174.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "280000", "valid_best_accuracy": "89.843"} [2024-07-15 05:33:34,211][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 280000 updates [2024-07-15 05:33:34,212][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_280000.pt [2024-07-15 05:33:37,434][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_280000.pt [2024-07-15 05:33:39,586][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_280000.pt (epoch 2 @ 280000 updates, score 89.841) (writing took 5.374397592968307 seconds) [2024-07-15 05:39:42,325][train_inner][INFO] - {"epoch": 2, "update": 1.858, "loss": "0.708", "ntokens": "128.02", "acc_total": "128.02", "n_correct": "113.395", "wer_total": "128.02", "n_error": "14.625", "ppl": "1.63", "accuracy": "88.576", "wer": "11.424", "wps": "8.9", "ups": "0.07", "wpb": "128", "bsz": "8", "num_updates": "280200", "lr": "3.36312e-05", "gnorm": "2.031", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "795326"} [2024-07-15 05:45:45,090][train_inner][INFO] - {"epoch": 2, "update": 1.859, "loss": "0.72", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "112.86", "wer_total": "126.975", "n_error": "14.115", "ppl": "1.65", "accuracy": "88.884", "wer": "11.116", "wps": "70", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "280400", "lr": "3.35306e-05", "gnorm": "1.959", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "795689"} [2024-07-15 05:51:48,031][train_inner][INFO] - {"epoch": 2, "update": 1.861, "loss": "0.712", "ntokens": "126.285", "acc_total": "126.285", "n_correct": "112.26", "wer_total": "126.285", "n_error": "14.025", "ppl": "1.64", "accuracy": "88.894", "wer": "11.106", "wps": "69.6", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "280600", "lr": "3.34303e-05", "gnorm": "1.992", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "796052"} [2024-07-15 05:57:51,472][train_inner][INFO] - {"epoch": 2, "update": 1.862, "loss": "0.752", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "112.78", "wer_total": "127.17", "n_error": "14.39", "ppl": "1.68", "accuracy": "88.684", "wer": "11.316", "wps": "70", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "280800", "lr": "3.33303e-05", "gnorm": "2.102", "loss_scale": "512", "train_wall": "363", "gb_free": "6.5", "wall": "796416"} [2024-07-15 06:03:54,543][train_inner][INFO] - {"epoch": 2, "update": 1.863, "loss": "0.7", "ntokens": "126.71", "acc_total": "126.71", "n_correct": "113.03", "wer_total": "126.71", "n_error": "13.68", "ppl": "1.62", "accuracy": "89.204", "wer": "10.796", "wps": "69.8", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "281000", "lr": "3.32306e-05", "gnorm": "2.04", "loss_scale": "512", "train_wall": "362", "gb_free": "6.5", "wall": "796779"} [2024-07-15 06:09:57,420][train_inner][INFO] - {"epoch": 2, "update": 1.865, "loss": "0.68", "ntokens": "127.805", "acc_total": "127.805", "n_correct": "113.95", "wer_total": "127.805", "n_error": "13.855", "ppl": "1.6", "accuracy": "89.159", "wer": "10.841", "wps": "70.4", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "281200", "lr": "3.31312e-05", "gnorm": "1.98", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "797141"} [2024-07-15 06:16:00,124][train_inner][INFO] - {"epoch": 2, "update": 1.866, "loss": "0.719", "ntokens": "126.91", "acc_total": "126.91", "n_correct": "112.86", "wer_total": "126.91", "n_error": "14.05", "ppl": "1.65", "accuracy": "88.929", "wer": "11.071", "wps": "70", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "281400", "lr": "3.30321e-05", "gnorm": "2.046", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "797504"} [2024-07-15 06:22:02,676][train_inner][INFO] - {"epoch": 2, "update": 1.867, "loss": "0.69", "ntokens": "125.68", "acc_total": "125.68", "n_correct": "112.215", "wer_total": "125.68", "n_error": "13.465", "ppl": "1.61", "accuracy": "89.286", "wer": "10.714", "wps": "69.3", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "281600", "lr": "3.29333e-05", "gnorm": "2.089", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "797867"} [2024-07-15 06:28:05,645][train_inner][INFO] - {"epoch": 2, "update": 1.869, "loss": "0.7", "ntokens": "126.175", "acc_total": "126.175", "n_correct": "112.51", "wer_total": "126.175", "n_error": "13.665", "ppl": "1.62", "accuracy": "89.17", "wer": "10.83", "wps": "69.5", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "281800", "lr": "3.28348e-05", "gnorm": "2.106", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "798230"} [2024-07-15 06:34:09,123][train_inner][INFO] - {"epoch": 2, "update": 1.87, "loss": "0.727", "ntokens": "127.03", "acc_total": "127.03", "n_correct": "112.88", "wer_total": "127.03", "n_error": "14.15", "ppl": "1.66", "accuracy": "88.861", "wer": "11.139", "wps": "69.9", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "282000", "lr": "3.27365e-05", "gnorm": "2.126", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "798593"} [2024-07-15 06:40:12,000][train_inner][INFO] - {"epoch": 2, "update": 1.871, "loss": "0.704", "ntokens": "126.51", "acc_total": "126.51", "n_correct": "112.725", "wer_total": "126.51", "n_error": "13.785", "ppl": "1.63", "accuracy": "89.104", "wer": "10.896", "wps": "69.7", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "282200", "lr": "3.26386e-05", "gnorm": "2.019", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "798956"} [2024-07-15 06:46:14,607][train_inner][INFO] - {"epoch": 2, "update": 1.873, "loss": "0.686", "ntokens": "125.875", "acc_total": "125.875", "n_correct": "112.12", "wer_total": "125.875", "n_error": "13.755", "ppl": "1.61", "accuracy": "89.072", "wer": "10.928", "wps": "69.4", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "282400", "lr": "3.2541e-05", "gnorm": "1.991", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "799319"} [2024-07-15 06:49:15,938][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 07:30:56,601][valid][INFO] - {"epoch": 2, "valid_loss": "0.617", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3123", "valid_wer_total": "18.1585", "valid_n_error": "1.84598", "valid_ppl": "1.53", "valid_accuracy": "89.833", "valid_wer": "10.166", "valid_wps": "174.2", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "282500", "valid_best_accuracy": "89.843"} [2024-07-15 07:30:56,601][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 282500 updates [2024-07-15 07:30:56,602][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_282500.pt [2024-07-15 07:30:59,846][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_282500.pt [2024-07-15 07:31:02,036][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_282500.pt (epoch 2 @ 282500 updates, score 89.833) (writing took 5.434740911005065 seconds) [2024-07-15 07:34:03,897][train_inner][INFO] - {"epoch": 2, "update": 1.874, "loss": "0.685", "ntokens": "127.075", "acc_total": "127.075", "n_correct": "113.435", "wer_total": "127.075", "n_error": "13.64", "ppl": "1.61", "accuracy": "89.266", "wer": "10.734", "wps": "8.9", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "282600", "lr": "3.24436e-05", "gnorm": "2.057", "loss_scale": "1024", "train_wall": "362", "gb_free": "6.5", "wall": "802188"} [2024-07-15 07:40:07,956][train_inner][INFO] - {"epoch": 2, "update": 1.875, "loss": "0.731", "ntokens": "125.72", "acc_total": "125.72", "n_correct": "111.865", "wer_total": "125.72", "n_error": "13.855", "ppl": "1.66", "accuracy": "88.979", "wer": "11.021", "wps": "69.1", "ups": "0.55", "wpb": "125.7", "bsz": "8", "num_updates": "282800", "lr": "3.23466e-05", "gnorm": "2.08", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "802552"} [2024-07-15 07:46:12,069][train_inner][INFO] - {"epoch": 2, "update": 1.877, "loss": "0.721", "ntokens": "126.885", "acc_total": "126.885", "n_correct": "112.655", "wer_total": "126.885", "n_error": "14.23", "ppl": "1.65", "accuracy": "88.785", "wer": "11.215", "wps": "69.7", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "283000", "lr": "3.22498e-05", "gnorm": "2.137", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "802916"} [2024-07-15 07:52:16,273][train_inner][INFO] - {"epoch": 2, "update": 1.878, "loss": "0.702", "ntokens": "127.605", "acc_total": "127.605", "n_correct": "113.52", "wer_total": "127.605", "n_error": "14.085", "ppl": "1.63", "accuracy": "88.962", "wer": "11.038", "wps": "70.1", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "283200", "lr": "3.21534e-05", "gnorm": "1.951", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "803280"} [2024-07-15 07:56:41,898][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 07:58:21,964][train_inner][INFO] - {"epoch": 2, "update": 1.879, "loss": "0.706", "ntokens": "125.42", "acc_total": "125.42", "n_correct": "111.64", "wer_total": "125.42", "n_error": "13.78", "ppl": "1.63", "accuracy": "89.013", "wer": "10.987", "wps": "68.6", "ups": "0.55", "wpb": "125.4", "bsz": "8", "num_updates": "283400", "lr": "3.20572e-05", "gnorm": "2.035", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "803646"} [2024-07-15 08:04:26,133][train_inner][INFO] - {"epoch": 2, "update": 1.881, "loss": "0.689", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "113.04", "wer_total": "126.76", "n_error": "13.72", "ppl": "1.61", "accuracy": "89.176", "wer": "10.824", "wps": "69.6", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "283600", "lr": "3.19613e-05", "gnorm": "2.076", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "804010"} [2024-07-15 08:10:30,108][train_inner][INFO] - {"epoch": 2, "update": 1.882, "loss": "0.702", "ntokens": "126.885", "acc_total": "126.885", "n_correct": "112.91", "wer_total": "126.885", "n_error": "13.975", "ppl": "1.63", "accuracy": "88.986", "wer": "11.014", "wps": "69.7", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "283800", "lr": "3.18657e-05", "gnorm": "2.008", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "804374"} [2024-07-15 08:16:34,279][train_inner][INFO] - {"epoch": 2, "update": 1.883, "loss": "0.69", "ntokens": "127.745", "acc_total": "127.745", "n_correct": "113.635", "wer_total": "127.745", "n_error": "14.11", "ppl": "1.61", "accuracy": "88.955", "wer": "11.045", "wps": "70.2", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "284000", "lr": "3.17704e-05", "gnorm": "2.029", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "804738"} [2024-07-15 08:22:38,483][train_inner][INFO] - {"epoch": 2, "update": 1.885, "loss": "0.699", "ntokens": "126.985", "acc_total": "126.985", "n_correct": "113.175", "wer_total": "126.985", "n_error": "13.81", "ppl": "1.62", "accuracy": "89.125", "wer": "10.875", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "284200", "lr": "3.16753e-05", "gnorm": "2.013", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "805103"} [2024-07-15 08:28:42,756][train_inner][INFO] - {"epoch": 2, "update": 1.886, "loss": "0.706", "ntokens": "126.36", "acc_total": "126.36", "n_correct": "112.45", "wer_total": "126.36", "n_error": "13.91", "ppl": "1.63", "accuracy": "88.992", "wer": "11.008", "wps": "69.4", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "284400", "lr": "3.15806e-05", "gnorm": "2.012", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "805467"} [2024-07-15 08:34:47,075][train_inner][INFO] - {"epoch": 2, "update": 1.887, "loss": "0.665", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "113.765", "wer_total": "127.1", "n_error": "13.335", "ppl": "1.59", "accuracy": "89.508", "wer": "10.492", "wps": "69.8", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "284600", "lr": "3.14861e-05", "gnorm": "1.983", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "805831"} [2024-07-15 08:40:51,158][train_inner][INFO] - {"epoch": 2, "update": 1.889, "loss": "0.701", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "112.475", "wer_total": "126.555", "n_error": "14.07", "ppl": "1.63", "accuracy": "88.874", "wer": "11.118", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "284800", "lr": "3.13919e-05", "gnorm": "2.069", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "806195"} [2024-07-15 08:46:55,596][train_inner][INFO] - {"epoch": 2, "update": 1.89, "loss": "0.718", "ntokens": "126.315", "acc_total": "126.315", "n_correct": "112.285", "wer_total": "126.315", "n_error": "14.03", "ppl": "1.64", "accuracy": "88.893", "wer": "11.107", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "285000", "lr": "3.1298e-05", "gnorm": "2.016", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "806560"} [2024-07-15 08:46:55,596][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 09:28:39,601][valid][INFO] - {"epoch": 2, "valid_loss": "0.616", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.322", "valid_wer_total": "18.1585", "valid_n_error": "1.83631", "valid_ppl": "1.53", "valid_accuracy": "89.886", "valid_wer": "10.113", "valid_wps": "174", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "285000", "valid_best_accuracy": "89.886"} [2024-07-15 09:28:39,601][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 285000 updates [2024-07-15 09:28:39,602][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_285000.pt [2024-07-15 09:28:42,972][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_285000.pt [2024-07-15 09:28:48,371][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_285000.pt (epoch 2 @ 285000 updates, score 89.886) (writing took 8.769922064850107 seconds) [2024-07-15 09:34:52,679][train_inner][INFO] - {"epoch": 2, "update": 1.891, "loss": "0.675", "ntokens": "126.17", "acc_total": "126.17", "n_correct": "112.905", "wer_total": "126.17", "n_error": "13.265", "ppl": "1.6", "accuracy": "89.486", "wer": "10.514", "wps": "8.8", "ups": "0.07", "wpb": "126.2", "bsz": "8", "num_updates": "285200", "lr": "3.12044e-05", "gnorm": "1.902", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "809437"} [2024-07-15 09:40:57,221][train_inner][INFO] - {"epoch": 2, "update": 1.893, "loss": "0.717", "ntokens": "126.955", "acc_total": "126.955", "n_correct": "112.94", "wer_total": "126.955", "n_error": "14.015", "ppl": "1.64", "accuracy": "88.961", "wer": "11.039", "wps": "69.7", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "285400", "lr": "3.11111e-05", "gnorm": "1.954", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "809801"} [2024-07-15 09:44:06,810][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 09:47:03,845][train_inner][INFO] - {"epoch": 2, "update": 1.894, "loss": "0.674", "ntokens": "127.06", "acc_total": "127.06", "n_correct": "113.775", "wer_total": "127.06", "n_error": "13.285", "ppl": "1.6", "accuracy": "89.544", "wer": "10.456", "wps": "69.3", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "285600", "lr": "3.1018e-05", "gnorm": "1.946", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "810168"} [2024-07-15 09:53:08,793][train_inner][INFO] - {"epoch": 2, "update": 1.895, "loss": "0.721", "ntokens": "126.135", "acc_total": "126.135", "n_correct": "112.065", "wer_total": "126.135", "n_error": "14.07", "ppl": "1.65", "accuracy": "88.845", "wer": "11.155", "wps": "69.1", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "285800", "lr": "3.09252e-05", "gnorm": "2.092", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "810533"} [2024-07-15 09:59:13,587][train_inner][INFO] - {"epoch": 2, "update": 1.897, "loss": "0.703", "ntokens": "127.315", "acc_total": "127.315", "n_correct": "113.465", "wer_total": "127.315", "n_error": "13.85", "ppl": "1.63", "accuracy": "89.121", "wer": "10.879", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "286000", "lr": "3.08327e-05", "gnorm": "1.946", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "810898"} [2024-07-15 10:05:18,357][train_inner][INFO] - {"epoch": 2, "update": 1.898, "loss": "0.722", "ntokens": "127.66", "acc_total": "127.66", "n_correct": "113.21", "wer_total": "127.66", "n_error": "14.45", "ppl": "1.65", "accuracy": "88.681", "wer": "11.319", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "286200", "lr": "3.07405e-05", "gnorm": "2.027", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "811262"} [2024-07-15 10:11:23,283][train_inner][INFO] - {"epoch": 2, "update": 1.899, "loss": "0.698", "ntokens": "127", "acc_total": "127", "n_correct": "113.165", "wer_total": "127", "n_error": "13.835", "ppl": "1.62", "accuracy": "89.106", "wer": "10.894", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "286400", "lr": "3.06486e-05", "gnorm": "2.09", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "811627"} [2024-07-15 10:17:28,214][train_inner][INFO] - {"epoch": 2, "update": 1.901, "loss": "0.676", "ntokens": "125.595", "acc_total": "125.595", "n_correct": "112.205", "wer_total": "125.595", "n_error": "13.39", "ppl": "1.6", "accuracy": "89.339", "wer": "10.661", "wps": "68.8", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "286600", "lr": "3.05569e-05", "gnorm": "2.032", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "811992"} [2024-07-15 10:23:32,983][train_inner][INFO] - {"epoch": 2, "update": 1.902, "loss": "0.687", "ntokens": "126.685", "acc_total": "126.685", "n_correct": "113.2", "wer_total": "126.685", "n_error": "13.485", "ppl": "1.61", "accuracy": "89.355", "wer": "10.645", "wps": "69.5", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "286800", "lr": "3.04655e-05", "gnorm": "1.976", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "812357"} [2024-07-15 10:29:37,914][train_inner][INFO] - {"epoch": 2, "update": 1.903, "loss": "0.688", "ntokens": "125.99", "acc_total": "125.99", "n_correct": "112.545", "wer_total": "125.99", "n_error": "13.445", "ppl": "1.61", "accuracy": "89.329", "wer": "10.671", "wps": "69", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "287000", "lr": "3.03743e-05", "gnorm": "1.984", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "812722"} [2024-07-15 10:35:42,925][train_inner][INFO] - {"epoch": 2, "update": 1.905, "loss": "0.67", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "113.915", "wer_total": "127.33", "n_error": "13.415", "ppl": "1.59", "accuracy": "89.464", "wer": "10.536", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "287200", "lr": "3.02835e-05", "gnorm": "1.924", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "813087"} [2024-07-15 10:41:47,833][train_inner][INFO] - {"epoch": 2, "update": 1.906, "loss": "0.724", "ntokens": "126.02", "acc_total": "126.02", "n_correct": "111.945", "wer_total": "126.02", "n_error": "14.07", "ppl": "1.65", "accuracy": "88.831", "wer": "11.165", "wps": "69.1", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "287400", "lr": "3.01929e-05", "gnorm": "2.031", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "813452"} [2024-07-15 10:44:50,227][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 11:26:39,318][valid][INFO] - {"epoch": 2, "valid_loss": "0.618", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3122", "valid_wer_total": "18.1585", "valid_n_error": "1.84623", "valid_ppl": "1.53", "valid_accuracy": "89.832", "valid_wer": "10.167", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "287500", "valid_best_accuracy": "89.886"} [2024-07-15 11:26:39,319][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 287500 updates [2024-07-15 11:26:39,319][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_287500.pt [2024-07-15 11:26:42,564][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_287500.pt [2024-07-15 11:26:44,684][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_287500.pt (epoch 2 @ 287500 updates, score 89.832) (writing took 5.365192304830998 seconds) [2024-07-15 11:29:47,091][train_inner][INFO] - {"epoch": 2, "update": 1.907, "loss": "0.688", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "112.675", "wer_total": "126.255", "n_error": "13.58", "ppl": "1.61", "accuracy": "89.244", "wer": "10.756", "wps": "8.8", "ups": "0.07", "wpb": "126.3", "bsz": "8", "num_updates": "287600", "lr": "3.01026e-05", "gnorm": "2.096", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "816331"} [2024-07-15 11:35:52,258][train_inner][INFO] - {"epoch": 2, "update": 1.909, "loss": "0.681", "ntokens": "126.815", "acc_total": "126.815", "n_correct": "113.115", "wer_total": "126.815", "n_error": "13.7", "ppl": "1.6", "accuracy": "89.197", "wer": "10.803", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "287800", "lr": "3.00125e-05", "gnorm": "1.966", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "816696"} [2024-07-15 11:41:57,482][train_inner][INFO] - {"epoch": 2, "update": 1.91, "loss": "0.696", "ntokens": "126.57", "acc_total": "126.57", "n_correct": "112.645", "wer_total": "126.57", "n_error": "13.925", "ppl": "1.62", "accuracy": "88.998", "wer": "11.002", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "288000", "lr": "2.99228e-05", "gnorm": "1.937", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "817062"} [2024-07-15 11:47:49,853][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 11:48:04,415][train_inner][INFO] - {"epoch": 2, "update": 1.911, "loss": "0.711", "ntokens": "127.045", "acc_total": "127.045", "n_correct": "113.07", "wer_total": "127.045", "n_error": "13.975", "ppl": "1.64", "accuracy": "89", "wer": "11", "wps": "69.2", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "288200", "lr": "2.98333e-05", "gnorm": "2.049", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "817428"} [2024-07-15 11:54:09,502][train_inner][INFO] - {"epoch": 2, "update": 1.913, "loss": "0.686", "ntokens": "128.045", "acc_total": "128.045", "n_correct": "114.235", "wer_total": "128.045", "n_error": "13.805", "ppl": "1.61", "accuracy": "89.215", "wer": "10.781", "wps": "70.1", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "288400", "lr": "2.9744e-05", "gnorm": "1.94", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "817794"} [2024-07-15 12:00:14,829][train_inner][INFO] - {"epoch": 2, "update": 1.914, "loss": "0.731", "ntokens": "126.505", "acc_total": "126.505", "n_correct": "112.195", "wer_total": "126.505", "n_error": "14.305", "ppl": "1.66", "accuracy": "88.688", "wer": "11.308", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "288600", "lr": "2.96551e-05", "gnorm": "2.025", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "818159"} [2024-07-15 12:06:20,019][train_inner][INFO] - {"epoch": 2, "update": 1.915, "loss": "0.7", "ntokens": "127.47", "acc_total": "127.47", "n_correct": "113.865", "wer_total": "127.47", "n_error": "13.605", "ppl": "1.62", "accuracy": "89.327", "wer": "10.673", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "288800", "lr": "2.95663e-05", "gnorm": "1.956", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "818524"} [2024-07-15 12:12:25,120][train_inner][INFO] - {"epoch": 2, "update": 1.917, "loss": "0.69", "ntokens": "126.24", "acc_total": "126.24", "n_correct": "112.67", "wer_total": "126.24", "n_error": "13.565", "ppl": "1.61", "accuracy": "89.251", "wer": "10.745", "wps": "69.2", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "289000", "lr": "2.94779e-05", "gnorm": "1.932", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "818889"} [2024-07-15 12:18:30,317][train_inner][INFO] - {"epoch": 2, "update": 1.918, "loss": "0.708", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "112.57", "wer_total": "126.555", "n_error": "13.985", "ppl": "1.63", "accuracy": "88.949", "wer": "11.051", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "289200", "lr": "2.93897e-05", "gnorm": "2.114", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "819254"} [2024-07-15 12:24:35,493][train_inner][INFO] - {"epoch": 2, "update": 1.919, "loss": "0.705", "ntokens": "127.2", "acc_total": "127.2", "n_correct": "113.735", "wer_total": "127.2", "n_error": "13.465", "ppl": "1.63", "accuracy": "89.414", "wer": "10.586", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "289400", "lr": "2.93018e-05", "gnorm": "2.128", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "819620"} [2024-07-15 12:30:40,573][train_inner][INFO] - {"epoch": 2, "update": 1.92, "loss": "0.742", "ntokens": "126.84", "acc_total": "126.84", "n_correct": "112.345", "wer_total": "126.84", "n_error": "14.49", "ppl": "1.67", "accuracy": "88.572", "wer": "11.424", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "289600", "lr": "2.92142e-05", "gnorm": "2.072", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "819985"} [2024-07-15 12:36:45,847][train_inner][INFO] - {"epoch": 2, "update": 1.922, "loss": "0.693", "ntokens": "126.175", "acc_total": "126.175", "n_correct": "112.72", "wer_total": "126.175", "n_error": "13.45", "ppl": "1.62", "accuracy": "89.336", "wer": "10.66", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "289800", "lr": "2.91268e-05", "gnorm": "2.044", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "820350"} [2024-07-15 12:42:51,126][train_inner][INFO] - {"epoch": 2, "update": 1.923, "loss": "0.703", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "113.2", "wer_total": "127.155", "n_error": "13.955", "ppl": "1.63", "accuracy": "89.025", "wer": "10.975", "wps": "69.6", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "290000", "lr": "2.90397e-05", "gnorm": "2.002", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "820715"} [2024-07-15 12:42:51,127][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 13:24:43,499][valid][INFO] - {"epoch": 2, "valid_loss": "0.616", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3174", "valid_wer_total": "18.1585", "valid_n_error": "1.84089", "valid_ppl": "1.53", "valid_accuracy": "89.861", "valid_wer": "10.138", "valid_wps": "173.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "290000", "valid_best_accuracy": "89.886"} [2024-07-15 13:24:43,500][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 290000 updates [2024-07-15 13:24:43,500][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_290000.pt [2024-07-15 13:24:46,740][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_290000.pt [2024-07-15 13:24:49,068][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_290000.pt (epoch 2 @ 290000 updates, score 89.861) (writing took 5.568329364992678 seconds) [2024-07-15 13:30:54,001][train_inner][INFO] - {"epoch": 2, "update": 1.924, "loss": "0.708", "ntokens": "126.76", "acc_total": "126.76", "n_correct": "112.94", "wer_total": "126.76", "n_error": "13.82", "ppl": "1.63", "accuracy": "89.098", "wer": "10.902", "wps": "8.8", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "290200", "lr": "2.89528e-05", "gnorm": "1.97", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "823598"} [2024-07-15 13:36:30,152][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 13:37:01,140][train_inner][INFO] - {"epoch": 2, "update": 1.926, "loss": "0.746", "ntokens": "126.655", "acc_total": "126.655", "n_correct": "111.97", "wer_total": "126.655", "n_error": "14.685", "ppl": "1.68", "accuracy": "88.406", "wer": "11.594", "wps": "69", "ups": "0.54", "wpb": "126.7", "bsz": "8", "num_updates": "290400", "lr": "2.88662e-05", "gnorm": "2.086", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "823965"} [2024-07-15 13:43:06,697][train_inner][INFO] - {"epoch": 2, "update": 1.927, "loss": "0.681", "ntokens": "127.435", "acc_total": "127.435", "n_correct": "113.855", "wer_total": "127.435", "n_error": "13.58", "ppl": "1.6", "accuracy": "89.344", "wer": "10.656", "wps": "69.7", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "290600", "lr": "2.87798e-05", "gnorm": "1.979", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "824331"} [2024-07-15 13:49:11,814][train_inner][INFO] - {"epoch": 2, "update": 1.928, "loss": "0.711", "ntokens": "126.975", "acc_total": "126.975", "n_correct": "112.595", "wer_total": "126.975", "n_error": "14.38", "ppl": "1.64", "accuracy": "88.675", "wer": "11.325", "wps": "69.6", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "290800", "lr": "2.86938e-05", "gnorm": "2.043", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "824696"} [2024-07-15 13:55:17,120][train_inner][INFO] - {"epoch": 2, "update": 1.93, "loss": "0.743", "ntokens": "125.925", "acc_total": "125.925", "n_correct": "111.665", "wer_total": "125.925", "n_error": "14.255", "ppl": "1.67", "accuracy": "88.676", "wer": "11.32", "wps": "68.9", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "291000", "lr": "2.86079e-05", "gnorm": "2.185", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "825061"} [2024-07-15 14:01:21,829][train_inner][INFO] - {"epoch": 2, "update": 1.931, "loss": "0.708", "ntokens": "127.265", "acc_total": "127.265", "n_correct": "113.335", "wer_total": "127.265", "n_error": "13.925", "ppl": "1.63", "accuracy": "89.054", "wer": "10.942", "wps": "69.8", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "291200", "lr": "2.85224e-05", "gnorm": "2.028", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "825426"} [2024-07-15 14:07:26,876][train_inner][INFO] - {"epoch": 2, "update": 1.932, "loss": "0.712", "ntokens": "125.83", "acc_total": "125.83", "n_correct": "111.89", "wer_total": "125.83", "n_error": "13.94", "ppl": "1.64", "accuracy": "88.922", "wer": "11.078", "wps": "68.9", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "291400", "lr": "2.8437e-05", "gnorm": "2.046", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "825791"} [2024-07-15 14:13:32,232][train_inner][INFO] - {"epoch": 2, "update": 1.934, "loss": "0.703", "ntokens": "126.74", "acc_total": "126.74", "n_correct": "113.13", "wer_total": "126.74", "n_error": "13.61", "ppl": "1.63", "accuracy": "89.261", "wer": "10.739", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "291600", "lr": "2.8352e-05", "gnorm": "2.102", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "826156"} [2024-07-15 14:19:37,577][train_inner][INFO] - {"epoch": 2, "update": 1.935, "loss": "0.72", "ntokens": "127.685", "acc_total": "127.685", "n_correct": "113.295", "wer_total": "127.685", "n_error": "14.39", "ppl": "1.65", "accuracy": "88.73", "wer": "11.27", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "291800", "lr": "2.82672e-05", "gnorm": "2.058", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "826522"} [2024-07-15 14:25:43,069][train_inner][INFO] - {"epoch": 2, "update": 1.936, "loss": "0.693", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "113.665", "wer_total": "127.44", "n_error": "13.77", "ppl": "1.62", "accuracy": "89.191", "wer": "10.805", "wps": "69.7", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "292000", "lr": "2.81826e-05", "gnorm": "1.975", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "826887"} [2024-07-15 14:31:48,201][train_inner][INFO] - {"epoch": 2, "update": 1.938, "loss": "0.718", "ntokens": "126.35", "acc_total": "126.35", "n_correct": "112.16", "wer_total": "126.35", "n_error": "14.19", "ppl": "1.65", "accuracy": "88.769", "wer": "11.231", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "292200", "lr": "2.80983e-05", "gnorm": "1.964", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "827252"} [2024-07-15 14:37:53,552][train_inner][INFO] - {"epoch": 2, "update": 1.939, "loss": "0.703", "ntokens": "125.86", "acc_total": "125.86", "n_correct": "112.105", "wer_total": "125.86", "n_error": "13.75", "ppl": "1.63", "accuracy": "89.071", "wer": "10.925", "wps": "68.9", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "292400", "lr": "2.80143e-05", "gnorm": "2.022", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "827618"} [2024-07-15 14:40:56,240][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 15:22:45,838][valid][INFO] - {"epoch": 2, "valid_loss": "0.612", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3296", "valid_wer_total": "18.1585", "valid_n_error": "1.82876", "valid_ppl": "1.53", "valid_accuracy": "89.928", "valid_wer": "10.071", "valid_wps": "173.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "292500", "valid_best_accuracy": "89.928"} [2024-07-15 15:22:45,839][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 292500 updates [2024-07-15 15:22:45,839][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_292500.pt [2024-07-15 15:22:49,046][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_292500.pt [2024-07-15 15:22:53,164][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_292500.pt (epoch 2 @ 292500 updates, score 89.928) (writing took 7.325585006037727 seconds) [2024-07-15 15:25:55,367][train_inner][INFO] - {"epoch": 2, "update": 1.94, "loss": "0.694", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "113.365", "wer_total": "127.145", "n_error": "13.78", "ppl": "1.62", "accuracy": "89.162", "wer": "10.838", "wps": "8.8", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "292600", "lr": "2.79305e-05", "gnorm": "2.011", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "830499"} [2024-07-15 15:32:00,280][train_inner][INFO] - {"epoch": 2, "update": 1.942, "loss": "0.705", "ntokens": "126.845", "acc_total": "126.845", "n_correct": "113.015", "wer_total": "126.845", "n_error": "13.83", "ppl": "1.63", "accuracy": "89.097", "wer": "10.903", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "292800", "lr": "2.78469e-05", "gnorm": "2.06", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "830864"} [2024-07-15 15:38:05,536][train_inner][INFO] - {"epoch": 2, "update": 1.943, "loss": "0.731", "ntokens": "126.375", "acc_total": "126.375", "n_correct": "112.15", "wer_total": "126.375", "n_error": "14.225", "ppl": "1.66", "accuracy": "88.744", "wer": "11.256", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "293000", "lr": "2.77636e-05", "gnorm": "2.158", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "831230"} [2024-07-15 15:44:11,058][train_inner][INFO] - {"epoch": 2, "update": 1.944, "loss": "0.688", "ntokens": "127.315", "acc_total": "127.315", "n_correct": "113.585", "wer_total": "127.315", "n_error": "13.725", "ppl": "1.61", "accuracy": "89.216", "wer": "10.78", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "293200", "lr": "2.76806e-05", "gnorm": "2.027", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "831595"} [2024-07-15 15:50:16,815][train_inner][INFO] - {"epoch": 2, "update": 1.946, "loss": "0.663", "ntokens": "127.34", "acc_total": "127.34", "n_correct": "113.64", "wer_total": "127.34", "n_error": "13.7", "ppl": "1.58", "accuracy": "89.241", "wer": "10.759", "wps": "69.6", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "293400", "lr": "2.75978e-05", "gnorm": "1.989", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "831961"} [2024-07-15 15:56:22,185][train_inner][INFO] - {"epoch": 2, "update": 1.947, "loss": "0.681", "ntokens": "127.325", "acc_total": "127.325", "n_correct": "114.03", "wer_total": "127.325", "n_error": "13.295", "ppl": "1.6", "accuracy": "89.558", "wer": "10.442", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "293600", "lr": "2.75152e-05", "gnorm": "1.971", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "832326"} [2024-07-15 15:58:00,950][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 16:02:29,588][train_inner][INFO] - {"epoch": 2, "update": 1.948, "loss": "0.73", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "112.655", "wer_total": "127.015", "n_error": "14.36", "ppl": "1.66", "accuracy": "88.694", "wer": "11.306", "wps": "69.1", "ups": "0.54", "wpb": "127", "bsz": "8", "num_updates": "293800", "lr": "2.74329e-05", "gnorm": "2.118", "loss_scale": "1024", "train_wall": "367", "gb_free": "6.5", "wall": "832694"} [2024-07-15 16:08:35,280][train_inner][INFO] - {"epoch": 2, "update": 1.95, "loss": "0.703", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "112.93", "wer_total": "126.905", "n_error": "13.975", "ppl": "1.63", "accuracy": "88.988", "wer": "11.012", "wps": "69.4", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "294000", "lr": "2.73509e-05", "gnorm": "1.989", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "833059"} [2024-07-15 16:14:40,538][train_inner][INFO] - {"epoch": 2, "update": 1.951, "loss": "0.718", "ntokens": "127.275", "acc_total": "127.275", "n_correct": "113.04", "wer_total": "127.275", "n_error": "14.235", "ppl": "1.64", "accuracy": "88.816", "wer": "11.184", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "294200", "lr": "2.7269e-05", "gnorm": "1.998", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "833425"} [2024-07-15 16:20:45,719][train_inner][INFO] - {"epoch": 2, "update": 1.952, "loss": "0.678", "ntokens": "127.345", "acc_total": "127.345", "n_correct": "113.565", "wer_total": "127.345", "n_error": "13.775", "ppl": "1.6", "accuracy": "89.179", "wer": "10.817", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "294400", "lr": "2.71875e-05", "gnorm": "2.004", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "833790"} [2024-07-15 16:26:50,602][train_inner][INFO] - {"epoch": 2, "update": 1.954, "loss": "0.691", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "112.755", "wer_total": "126.705", "n_error": "13.935", "ppl": "1.61", "accuracy": "88.99", "wer": "10.998", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "294600", "lr": "2.71061e-05", "gnorm": "1.911", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "834155"} [2024-07-15 16:32:55,980][train_inner][INFO] - {"epoch": 2, "update": 1.955, "loss": "0.694", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "113.715", "wer_total": "127.485", "n_error": "13.765", "ppl": "1.62", "accuracy": "89.199", "wer": "10.797", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "294800", "lr": "2.70251e-05", "gnorm": "1.937", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "834520"} [2024-07-15 16:39:01,155][train_inner][INFO] - {"epoch": 2, "update": 1.956, "loss": "0.718", "ntokens": "126.46", "acc_total": "126.46", "n_correct": "112.535", "wer_total": "126.46", "n_error": "13.925", "ppl": "1.64", "accuracy": "88.989", "wer": "11.011", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "295000", "lr": "2.69442e-05", "gnorm": "1.989", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "834885"} [2024-07-15 16:39:01,156][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 17:20:54,671][valid][INFO] - {"epoch": 2, "valid_loss": "0.611", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3303", "valid_wer_total": "18.1585", "valid_n_error": "1.82801", "valid_ppl": "1.53", "valid_accuracy": "89.932", "valid_wer": "10.067", "valid_wps": "173.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "295000", "valid_best_accuracy": "89.932"} [2024-07-15 17:20:54,671][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 295000 updates [2024-07-15 17:20:54,672][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_295000.pt [2024-07-15 17:20:57,902][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_295000.pt [2024-07-15 17:21:02,288][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_295000.pt (epoch 2 @ 295000 updates, score 89.932) (writing took 7.616068529896438 seconds) [2024-07-15 17:27:07,500][train_inner][INFO] - {"epoch": 2, "update": 1.958, "loss": "0.706", "ntokens": "127.55", "acc_total": "127.55", "n_correct": "113.39", "wer_total": "127.55", "n_error": "14.16", "ppl": "1.63", "accuracy": "88.898", "wer": "11.102", "wps": "8.8", "ups": "0.07", "wpb": "127.5", "bsz": "8", "num_updates": "295200", "lr": "2.68636e-05", "gnorm": "2.01", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "837772"} [2024-07-15 17:33:13,075][train_inner][INFO] - {"epoch": 2, "update": 1.959, "loss": "0.709", "ntokens": "127.925", "acc_total": "127.925", "n_correct": "113.78", "wer_total": "127.925", "n_error": "14.14", "ppl": "1.63", "accuracy": "88.943", "wer": "11.053", "wps": "70", "ups": "0.55", "wpb": "127.9", "bsz": "8", "num_updates": "295400", "lr": "2.67833e-05", "gnorm": "2.036", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "838137"} [2024-07-15 17:39:18,606][train_inner][INFO] - {"epoch": 2, "update": 1.96, "loss": "0.702", "ntokens": "127.71", "acc_total": "127.71", "n_correct": "113.49", "wer_total": "127.71", "n_error": "14.22", "ppl": "1.63", "accuracy": "88.865", "wer": "11.135", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "295600", "lr": "2.67032e-05", "gnorm": "2.079", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "838503"} [2024-07-15 17:45:23,950][train_inner][INFO] - {"epoch": 2, "update": 1.962, "loss": "0.696", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "113.155", "wer_total": "126.81", "n_error": "13.655", "ppl": "1.62", "accuracy": "89.232", "wer": "10.768", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "295800", "lr": "2.66233e-05", "gnorm": "2.028", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "838868"} [2024-07-15 17:51:29,427][train_inner][INFO] - {"epoch": 2, "update": 1.963, "loss": "0.653", "ntokens": "128.63", "acc_total": "128.63", "n_correct": "115.395", "wer_total": "128.63", "n_error": "13.235", "ppl": "1.57", "accuracy": "89.711", "wer": "10.289", "wps": "70.4", "ups": "0.55", "wpb": "128.6", "bsz": "8", "num_updates": "296000", "lr": "2.65436e-05", "gnorm": "1.901", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "839233"} [2024-07-15 17:57:34,767][train_inner][INFO] - {"epoch": 2, "update": 1.964, "loss": "0.681", "ntokens": "126.38", "acc_total": "126.38", "n_correct": "113.195", "wer_total": "126.38", "n_error": "13.185", "ppl": "1.6", "accuracy": "89.567", "wer": "10.433", "wps": "69.2", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "296200", "lr": "2.64642e-05", "gnorm": "1.971", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "839599"} [2024-07-15 18:03:39,579][train_inner][INFO] - {"epoch": 2, "update": 1.966, "loss": "0.704", "ntokens": "126.245", "acc_total": "126.245", "n_correct": "112.905", "wer_total": "126.245", "n_error": "13.34", "ppl": "1.63", "accuracy": "89.433", "wer": "10.567", "wps": "69.2", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "296400", "lr": "2.63851e-05", "gnorm": "1.987", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "839964"} [2024-07-15 18:09:44,507][train_inner][INFO] - {"epoch": 2, "update": 1.967, "loss": "0.714", "ntokens": "127.36", "acc_total": "127.36", "n_correct": "113.42", "wer_total": "127.36", "n_error": "13.935", "ppl": "1.64", "accuracy": "89.055", "wer": "10.941", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "296600", "lr": "2.63062e-05", "gnorm": "2.007", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "840329"} [2024-07-15 18:10:29,958][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 18:15:50,876][train_inner][INFO] - {"epoch": 2, "update": 1.968, "loss": "0.714", "ntokens": "126.06", "acc_total": "126.06", "n_correct": "111.96", "wer_total": "126.06", "n_error": "14.095", "ppl": "1.64", "accuracy": "88.815", "wer": "11.181", "wps": "68.8", "ups": "0.55", "wpb": "126.1", "bsz": "8", "num_updates": "296800", "lr": "2.62275e-05", "gnorm": "2.052", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "840695"} [2024-07-15 18:21:55,268][train_inner][INFO] - {"epoch": 2, "update": 1.97, "loss": "0.723", "ntokens": "126.325", "acc_total": "126.325", "n_correct": "112.09", "wer_total": "126.325", "n_error": "14.235", "ppl": "1.65", "accuracy": "88.731", "wer": "11.269", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "297000", "lr": "2.6149e-05", "gnorm": "2.119", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "841059"} [2024-07-15 18:27:59,943][train_inner][INFO] - {"epoch": 2, "update": 1.971, "loss": "0.717", "ntokens": "126.05", "acc_total": "126.05", "n_correct": "112.245", "wer_total": "126.05", "n_error": "13.805", "ppl": "1.64", "accuracy": "89.048", "wer": "10.952", "wps": "69.1", "ups": "0.55", "wpb": "126", "bsz": "8", "num_updates": "297200", "lr": "2.60708e-05", "gnorm": "2.13", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "841424"} [2024-07-15 18:34:04,323][train_inner][INFO] - {"epoch": 2, "update": 1.972, "loss": "0.692", "ntokens": "127.365", "acc_total": "127.365", "n_correct": "113.915", "wer_total": "127.365", "n_error": "13.45", "ppl": "1.62", "accuracy": "89.44", "wer": "10.56", "wps": "69.9", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "297400", "lr": "2.59928e-05", "gnorm": "2.143", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "841788"} [2024-07-15 18:37:06,695][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 19:18:51,890][valid][INFO] - {"epoch": 2, "valid_loss": "0.611", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.3324", "valid_wer_total": "18.1585", "valid_n_error": "1.82593", "valid_ppl": "1.53", "valid_accuracy": "89.944", "valid_wer": "10.056", "valid_wps": "173.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "297500", "valid_best_accuracy": "89.944"} [2024-07-15 19:18:51,891][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 297500 updates [2024-07-15 19:18:51,891][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_297500.pt [2024-07-15 19:18:55,161][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_297500.pt [2024-07-15 19:19:00,549][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_297500.pt (epoch 2 @ 297500 updates, score 89.944) (writing took 8.657731476938352 seconds) [2024-07-15 19:22:02,444][train_inner][INFO] - {"epoch": 2, "update": 1.974, "loss": "0.721", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "113.11", "wer_total": "127.405", "n_error": "14.29", "ppl": "1.65", "accuracy": "88.78", "wer": "11.216", "wps": "8.9", "ups": "0.07", "wpb": "127.4", "bsz": "8", "num_updates": "297600", "lr": "2.59151e-05", "gnorm": "2.011", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "844666"} [2024-07-15 19:28:07,044][train_inner][INFO] - {"epoch": 2, "update": 1.975, "loss": "0.683", "ntokens": "128.08", "acc_total": "128.08", "n_correct": "114.625", "wer_total": "128.08", "n_error": "13.455", "ppl": "1.61", "accuracy": "89.495", "wer": "10.505", "wps": "70.3", "ups": "0.55", "wpb": "128.1", "bsz": "8", "num_updates": "297800", "lr": "2.58376e-05", "gnorm": "2.06", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "845031"} [2024-07-15 19:34:11,323][train_inner][INFO] - {"epoch": 2, "update": 1.976, "loss": "0.669", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "113.19", "wer_total": "126.555", "n_error": "13.365", "ppl": "1.59", "accuracy": "89.439", "wer": "10.561", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "298000", "lr": "2.57603e-05", "gnorm": "1.934", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "845395"} [2024-07-15 19:40:15,285][train_inner][INFO] - {"epoch": 2, "update": 1.978, "loss": "0.707", "ntokens": "126.555", "acc_total": "126.555", "n_correct": "112.665", "wer_total": "126.555", "n_error": "13.89", "ppl": "1.63", "accuracy": "89.025", "wer": "10.975", "wps": "69.5", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "298200", "lr": "2.56832e-05", "gnorm": "1.969", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "845759"} [2024-07-15 19:46:19,255][train_inner][INFO] - {"epoch": 2, "update": 1.979, "loss": "0.72", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "112.54", "wer_total": "126.68", "n_error": "14.135", "ppl": "1.65", "accuracy": "88.838", "wer": "11.158", "wps": "69.6", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "298400", "lr": "2.56064e-05", "gnorm": "1.986", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "846123"} [2024-07-15 19:52:23,261][train_inner][INFO] - {"epoch": 2, "update": 1.98, "loss": "0.678", "ntokens": "127.455", "acc_total": "127.455", "n_correct": "113.685", "wer_total": "127.455", "n_error": "13.765", "ppl": "1.6", "accuracy": "89.196", "wer": "10.8", "wps": "70", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "298600", "lr": "2.55298e-05", "gnorm": "1.957", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "846487"} [2024-07-15 19:58:27,275][train_inner][INFO] - {"epoch": 2, "update": 1.981, "loss": "0.68", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "112.595", "wer_total": "126.21", "n_error": "13.615", "ppl": "1.6", "accuracy": "89.212", "wer": "10.788", "wps": "69.3", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "298800", "lr": "2.54534e-05", "gnorm": "1.928", "loss_scale": "2048", "train_wall": "363", "gb_free": "6.5", "wall": "846851"} [2024-07-15 20:04:25,965][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0 [2024-07-15 20:04:33,260][train_inner][INFO] - {"epoch": 2, "update": 1.983, "loss": "0.691", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "113.01", "wer_total": "126.9", "n_error": "13.89", "ppl": "1.61", "accuracy": "89.054", "wer": "10.946", "wps": "69.3", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "299000", "lr": "2.53773e-05", "gnorm": "2.007", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "847217"} [2024-07-15 20:10:37,508][train_inner][INFO] - {"epoch": 2, "update": 1.984, "loss": "0.71", "ntokens": "126.43", "acc_total": "126.43", "n_correct": "112.635", "wer_total": "126.43", "n_error": "13.795", "ppl": "1.64", "accuracy": "89.089", "wer": "10.911", "wps": "69.4", "ups": "0.55", "wpb": "126.4", "bsz": "8", "num_updates": "299200", "lr": "2.53014e-05", "gnorm": "1.9", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "847582"} [2024-07-15 20:16:41,631][train_inner][INFO] - {"epoch": 2, "update": 1.985, "loss": "0.669", "ntokens": "126.905", "acc_total": "126.905", "n_correct": "113.45", "wer_total": "126.905", "n_error": "13.455", "ppl": "1.59", "accuracy": "89.398", "wer": "10.602", "wps": "69.7", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "299400", "lr": "2.52257e-05", "gnorm": "1.963", "loss_scale": "1024", "train_wall": "363", "gb_free": "6.5", "wall": "847946"} [2024-07-15 20:22:46,021][train_inner][INFO] - {"epoch": 2, "update": 1.987, "loss": "0.657", "ntokens": "126.26", "acc_total": "126.26", "n_correct": "112.685", "wer_total": "126.26", "n_error": "13.57", "ppl": "1.58", "accuracy": "89.248", "wer": "10.748", "wps": "69.3", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "299600", "lr": "2.51502e-05", "gnorm": "1.938", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "848310"} [2024-07-15 20:28:50,322][train_inner][INFO] - {"epoch": 2, "update": 1.988, "loss": "0.669", "ntokens": "127.25", "acc_total": "127.25", "n_correct": "113.675", "wer_total": "127.25", "n_error": "13.57", "ppl": "1.59", "accuracy": "89.332", "wer": "10.664", "wps": "69.9", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "299800", "lr": "2.5075e-05", "gnorm": "1.967", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "848674"} [2024-07-15 20:34:54,645][train_inner][INFO] - {"epoch": 2, "update": 1.989, "loss": "0.687", "ntokens": "127.84", "acc_total": "127.84", "n_correct": "114.025", "wer_total": "127.84", "n_error": "13.81", "ppl": "1.61", "accuracy": "89.194", "wer": "10.803", "wps": "70.2", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "300000", "lr": "2.5e-05", "gnorm": "1.968", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "849039"} [2024-07-15 20:34:54,645][fairseq_cli.train][INFO] - Stopping training due to num_updates: 300000 >= max_update: 300000 [2024-07-15 20:34:54,645][fairseq_cli.train][INFO] - begin validation on "valid" subset [2024-07-15 21:16:40,029][valid][INFO] - {"epoch": 2, "valid_loss": "0.609", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "16.336", "valid_wer_total": "18.1585", "valid_n_error": "1.82238", "valid_ppl": "1.53", "valid_accuracy": "89.963", "valid_wer": "10.036", "valid_wps": "173.9", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "300000", "valid_best_accuracy": "89.963"} [2024-07-15 21:16:40,029][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 300000 updates [2024-07-15 21:16:40,030][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_300000.pt [2024-07-15 21:16:43,240][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_300000.pt [2024-07-15 21:16:48,632][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_300000.pt (epoch 2 @ 300000 updates, score 89.963) (writing took 8.60233649914153 seconds) [2024-07-15 21:16:48,671][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below) [2024-07-15 21:16:48,673][train][INFO] - {"epoch": 2, "train_loss": "0.796", "train_ntokens": "126.889", "train_acc_total": "126.889", "train_n_correct": "111.502", "train_wer_total": "126.889", "train_n_error": "15.3837", "train_ppl": "1.74", "train_accuracy": "87.874", "train_wer": "12.124", "train_wps": "44.8", "train_ups": "0.35", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "300000", "train_lr": "2.5e-05", "train_gnorm": "2.371", "train_loss_scale": "1024", "train_train_wall": "271384", "train_gb_free": "6.5", "train_wall": "851553"} [2024-07-15 21:16:48,673][fairseq_cli.train][INFO] - done training in 851549.4 seconds