{ "best_metric": 0.10391458868980408, "best_model_checkpoint": "vit-large-ai-or-not/checkpoint-2000", "epoch": 2.0, "eval_steps": 200, "global_step": 2212, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18083182640144665, "grad_norm": 1.507786750793457, "learning_rate": 0.00018191681735985535, "loss": 0.3925, "step": 200 }, { "epoch": 0.18083182640144665, "eval_accuracy": 0.878625134264232, "eval_loss": 0.40452271699905396, "eval_runtime": 14.0535, "eval_samples_per_second": 66.247, "eval_steps_per_second": 8.325, "step": 200 }, { "epoch": 0.3616636528028933, "grad_norm": 3.166074514389038, "learning_rate": 0.00016383363471971068, "loss": 0.2803, "step": 400 }, { "epoch": 0.3616636528028933, "eval_accuracy": 0.9044038668098818, "eval_loss": 0.2385927140712738, "eval_runtime": 13.6284, "eval_samples_per_second": 68.313, "eval_steps_per_second": 8.585, "step": 400 }, { "epoch": 0.5424954792043399, "grad_norm": 3.2524709701538086, "learning_rate": 0.00014575045207956602, "loss": 0.2235, "step": 600 }, { "epoch": 0.5424954792043399, "eval_accuracy": 0.9172932330827067, "eval_loss": 0.1893242746591568, "eval_runtime": 13.8787, "eval_samples_per_second": 67.081, "eval_steps_per_second": 8.43, "step": 600 }, { "epoch": 0.7233273056057866, "grad_norm": 0.6501625776290894, "learning_rate": 0.00012766726943942136, "loss": 0.217, "step": 800 }, { "epoch": 0.7233273056057866, "eval_accuracy": 0.9398496240601504, "eval_loss": 0.15967358648777008, "eval_runtime": 15.6768, "eval_samples_per_second": 59.387, "eval_steps_per_second": 7.463, "step": 800 }, { "epoch": 0.9041591320072333, "grad_norm": 0.8514215350151062, "learning_rate": 0.00010958408679927667, "loss": 0.1865, "step": 1000 }, { "epoch": 0.9041591320072333, "eval_accuracy": 0.9419978517722879, "eval_loss": 0.14128026366233826, "eval_runtime": 14.4065, "eval_samples_per_second": 64.624, "eval_steps_per_second": 8.121, "step": 1000 }, { "epoch": 1.0849909584086799, "grad_norm": 0.2293129414319992, "learning_rate": 9.150090415913202e-05, "loss": 0.1309, "step": 1200 }, { "epoch": 1.0849909584086799, "eval_accuracy": 0.9516648764769066, "eval_loss": 0.1473875343799591, "eval_runtime": 14.0786, "eval_samples_per_second": 66.129, "eval_steps_per_second": 8.31, "step": 1200 }, { "epoch": 1.2658227848101267, "grad_norm": 4.756102561950684, "learning_rate": 7.341772151898734e-05, "loss": 0.1008, "step": 1400 }, { "epoch": 1.2658227848101267, "eval_accuracy": 0.9419978517722879, "eval_loss": 0.19141799211502075, "eval_runtime": 13.9995, "eval_samples_per_second": 66.502, "eval_steps_per_second": 8.357, "step": 1400 }, { "epoch": 1.4466546112115732, "grad_norm": 0.034902941435575485, "learning_rate": 5.533453887884268e-05, "loss": 0.0793, "step": 1600 }, { "epoch": 1.4466546112115732, "eval_accuracy": 0.9441460794844253, "eval_loss": 0.15568311512470245, "eval_runtime": 14.1017, "eval_samples_per_second": 66.021, "eval_steps_per_second": 8.297, "step": 1600 }, { "epoch": 1.6274864376130198, "grad_norm": 1.2290639877319336, "learning_rate": 3.725135623869802e-05, "loss": 0.0804, "step": 1800 }, { "epoch": 1.6274864376130198, "eval_accuracy": 0.9312567132116004, "eval_loss": 0.23011024296283722, "eval_runtime": 13.8445, "eval_samples_per_second": 67.247, "eval_steps_per_second": 8.451, "step": 1800 }, { "epoch": 1.8083182640144666, "grad_norm": 0.08316856622695923, "learning_rate": 1.9168173598553345e-05, "loss": 0.0814, "step": 2000 }, { "epoch": 1.8083182640144666, "eval_accuracy": 0.958109559613319, "eval_loss": 0.10391458868980408, "eval_runtime": 14.1201, "eval_samples_per_second": 65.935, "eval_steps_per_second": 8.286, "step": 2000 }, { "epoch": 1.9891500904159132, "grad_norm": 0.11144193261861801, "learning_rate": 1.08499095840868e-06, "loss": 0.0446, "step": 2200 }, { "epoch": 1.9891500904159132, "eval_accuracy": 0.9634801288936627, "eval_loss": 0.1123916506767273, "eval_runtime": 14.6416, "eval_samples_per_second": 63.586, "eval_steps_per_second": 7.991, "step": 2200 }, { "epoch": 2.0, "step": 2212, "total_flos": 9.690147678529511e+18, "train_loss": 0.16445749211268226, "train_runtime": 2512.7977, "train_samples_per_second": 14.078, "train_steps_per_second": 0.88 } ], "logging_steps": 200, "max_steps": 2212, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.690147678529511e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }