{ "best_metric": 4.043909549713135, "best_model_checkpoint": "vit5-base_vi/checkpoint-200", "epoch": 1.5021459227467813, "eval_steps": 50, "global_step": 350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 0.0002948497854077253, "loss": 2.5915, "step": 20 }, { "epoch": 0.17, "learning_rate": 0.0002896995708154506, "loss": 1.9115, "step": 40 }, { "epoch": 0.21, "eval_loss": 4.051170349121094, "eval_runtime": 8.1419, "eval_samples_per_second": 280.646, "eval_steps_per_second": 3.562, "step": 50 }, { "epoch": 0.26, "learning_rate": 0.00028454935622317595, "loss": 1.8611, "step": 60 }, { "epoch": 0.34, "learning_rate": 0.00027939914163090123, "loss": 1.8184, "step": 80 }, { "epoch": 0.43, "learning_rate": 0.0002742489270386266, "loss": 1.783, "step": 100 }, { "epoch": 0.43, "eval_loss": 4.106208801269531, "eval_runtime": 8.1079, "eval_samples_per_second": 281.824, "eval_steps_per_second": 3.577, "step": 100 }, { "epoch": 0.52, "learning_rate": 0.0002690987124463519, "loss": 1.7446, "step": 120 }, { "epoch": 0.6, "learning_rate": 0.00026394849785407726, "loss": 1.7667, "step": 140 }, { "epoch": 0.64, "eval_loss": 4.075512886047363, "eval_runtime": 8.0983, "eval_samples_per_second": 282.159, "eval_steps_per_second": 3.581, "step": 150 }, { "epoch": 0.69, "learning_rate": 0.00025879828326180255, "loss": 1.7709, "step": 160 }, { "epoch": 0.77, "learning_rate": 0.0002536480686695279, "loss": 1.7525, "step": 180 }, { "epoch": 0.86, "learning_rate": 0.0002484978540772532, "loss": 1.734, "step": 200 }, { "epoch": 0.86, "eval_loss": 4.043909549713135, "eval_runtime": 8.089, "eval_samples_per_second": 282.482, "eval_steps_per_second": 3.585, "step": 200 }, { "epoch": 0.94, "learning_rate": 0.00024334763948497852, "loss": 1.6832, "step": 220 }, { "epoch": 1.03, "learning_rate": 0.00023819742489270384, "loss": 1.5453, "step": 240 }, { "epoch": 1.07, "eval_loss": 4.2676682472229, "eval_runtime": 8.1069, "eval_samples_per_second": 281.859, "eval_steps_per_second": 3.577, "step": 250 }, { "epoch": 1.12, "learning_rate": 0.00023304721030042918, "loss": 1.2799, "step": 260 }, { "epoch": 1.2, "learning_rate": 0.0002278969957081545, "loss": 1.2939, "step": 280 }, { "epoch": 1.29, "learning_rate": 0.0002227467811158798, "loss": 1.3032, "step": 300 }, { "epoch": 1.29, "eval_loss": 4.165661334991455, "eval_runtime": 8.1052, "eval_samples_per_second": 281.916, "eval_steps_per_second": 3.578, "step": 300 }, { "epoch": 1.37, "learning_rate": 0.00021759656652360512, "loss": 1.3073, "step": 320 }, { "epoch": 1.46, "learning_rate": 0.00021244635193133044, "loss": 1.3146, "step": 340 }, { "epoch": 1.5, "eval_loss": 4.157186508178711, "eval_runtime": 8.1161, "eval_samples_per_second": 281.54, "eval_steps_per_second": 3.573, "step": 350 } ], "logging_steps": 20, "max_steps": 1165, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "total_flos": 1.705082034847744e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }