{ "best_metric": 0.3, "best_model_checkpoint": "videomae-base-finetuned-kinetics-finetuned-caer-subset-5-classes\\checkpoint-250", "epoch": 1.444, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 11.369068145751953, "learning_rate": 2e-05, "loss": 1.5655, "step": 10 }, { "epoch": 0.08, "grad_norm": 11.059978485107422, "learning_rate": 4e-05, "loss": 1.6109, "step": 20 }, { "epoch": 0.12, "grad_norm": 12.424741744995117, "learning_rate": 4.888888888888889e-05, "loss": 1.6169, "step": 30 }, { "epoch": 0.16, "grad_norm": 15.846301078796387, "learning_rate": 4.666666666666667e-05, "loss": 1.6663, "step": 40 }, { "epoch": 0.2, "grad_norm": 6.054256439208984, "learning_rate": 4.4444444444444447e-05, "loss": 1.7779, "step": 50 }, { "epoch": 0.24, "grad_norm": 7.3695454597473145, "learning_rate": 4.222222222222222e-05, "loss": 1.6658, "step": 60 }, { "epoch": 0.28, "grad_norm": 7.011582851409912, "learning_rate": 4e-05, "loss": 1.6601, "step": 70 }, { "epoch": 0.32, "grad_norm": 7.409139633178711, "learning_rate": 3.777777777777778e-05, "loss": 1.6056, "step": 80 }, { "epoch": 0.36, "grad_norm": 9.646027565002441, "learning_rate": 3.555555555555556e-05, "loss": 1.7752, "step": 90 }, { "epoch": 0.4, "grad_norm": 8.108616828918457, "learning_rate": 3.3333333333333335e-05, "loss": 1.6162, "step": 100 }, { "epoch": 0.44, "grad_norm": 4.2093586921691895, "learning_rate": 3.111111111111111e-05, "loss": 1.6689, "step": 110 }, { "epoch": 0.48, "grad_norm": 4.784428596496582, "learning_rate": 2.8888888888888888e-05, "loss": 1.5406, "step": 120 }, { "epoch": 0.52, "grad_norm": 5.809035301208496, "learning_rate": 2.6666666666666667e-05, "loss": 1.6206, "step": 130 }, { "epoch": 0.56, "eval_accuracy": 0.2, "eval_loss": 1.577626347541809, "eval_runtime": 10.5956, "eval_samples_per_second": 2.831, "eval_steps_per_second": 1.416, "step": 139 }, { "epoch": 1.0, "grad_norm": 6.738602638244629, "learning_rate": 2.4444444444444445e-05, "loss": 1.4571, "step": 140 }, { "epoch": 1.04, "grad_norm": 6.5725579261779785, "learning_rate": 2.2222222222222223e-05, "loss": 1.4523, "step": 150 }, { "epoch": 1.08, "grad_norm": 9.774547576904297, "learning_rate": 2e-05, "loss": 1.4379, "step": 160 }, { "epoch": 1.12, "grad_norm": 7.136582374572754, "learning_rate": 1.777777777777778e-05, "loss": 1.3654, "step": 170 }, { "epoch": 1.16, "grad_norm": 10.755990028381348, "learning_rate": 1.5555555555555555e-05, "loss": 1.6211, "step": 180 }, { "epoch": 1.2, "grad_norm": 9.731955528259277, "learning_rate": 1.3333333333333333e-05, "loss": 1.4025, "step": 190 }, { "epoch": 1.24, "grad_norm": 9.016931533813477, "learning_rate": 1.1111111111111112e-05, "loss": 1.4272, "step": 200 }, { "epoch": 1.28, "grad_norm": 8.873564720153809, "learning_rate": 8.88888888888889e-06, "loss": 1.352, "step": 210 }, { "epoch": 1.32, "grad_norm": 10.840998649597168, "learning_rate": 6.666666666666667e-06, "loss": 1.3692, "step": 220 }, { "epoch": 1.36, "grad_norm": 6.855379581451416, "learning_rate": 4.444444444444445e-06, "loss": 1.3788, "step": 230 }, { "epoch": 1.4, "grad_norm": 10.270050048828125, "learning_rate": 2.2222222222222225e-06, "loss": 1.4626, "step": 240 }, { "epoch": 1.44, "grad_norm": 11.716329574584961, "learning_rate": 0.0, "loss": 1.315, "step": 250 }, { "epoch": 1.44, "eval_accuracy": 0.3, "eval_loss": 1.531712293624878, "eval_runtime": 9.5018, "eval_samples_per_second": 3.157, "eval_steps_per_second": 1.579, "step": 250 }, { "epoch": 1.44, "step": 250, "total_flos": 6.2304896360448e+17, "train_loss": 1.5372729606628417, "train_runtime": 255.4235, "train_samples_per_second": 1.958, "train_steps_per_second": 0.979 }, { "epoch": 1.44, "eval_accuracy": 0.2191780821917808, "eval_loss": 1.581635594367981, "eval_runtime": 22.9267, "eval_samples_per_second": 3.184, "eval_steps_per_second": 1.614, "step": 250 }, { "epoch": 1.44, "eval_accuracy": 0.2191780821917808, "eval_loss": 1.5816354751586914, "eval_runtime": 22.2614, "eval_samples_per_second": 3.279, "eval_steps_per_second": 1.662, "step": 250 } ], "logging_steps": 10, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 6.2304896360448e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }