{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998282770463652, "eval_steps": 6, "global_step": 109, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05495134516313681, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.2633, "step": 6 }, { "epoch": 0.10990269032627362, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.2511, "step": 12 }, { "epoch": 0.1648540354894104, "grad_norm": 0.5077374577522278, "learning_rate": 0.0016329931618554523, "loss": 1.2743, "step": 18 }, { "epoch": 0.21980538065254723, "grad_norm": 0.4308764338493347, "learning_rate": 0.001, "loss": 1.2678, "step": 24 }, { "epoch": 0.27475672581568406, "grad_norm": 0.3473643958568573, "learning_rate": 0.0007559289460184544, "loss": 0.9491, "step": 30 }, { "epoch": 0.3297080709788208, "grad_norm": 0.3959534466266632, "learning_rate": 0.0006324555320336759, "loss": 0.8782, "step": 36 }, { "epoch": 0.38465941614195764, "grad_norm": 0.3642798960208893, "learning_rate": 0.0005547001962252292, "loss": 0.8816, "step": 42 }, { "epoch": 0.43961076130509447, "grad_norm": 0.3345658481121063, "learning_rate": 0.0005, "loss": 0.8279, "step": 48 }, { "epoch": 0.49456210646823123, "grad_norm": 0.31645363569259644, "learning_rate": 0.0004588314677411235, "loss": 0.8249, "step": 54 }, { "epoch": 0.5495134516313681, "grad_norm": 0.29738476872444153, "learning_rate": 0.00042640143271122083, "loss": 0.8297, "step": 60 }, { "epoch": 0.6044647967945048, "grad_norm": 0.3066190183162689, "learning_rate": 0.0004, "loss": 0.815, "step": 66 }, { "epoch": 0.6594161419576416, "grad_norm": 0.3899874985218048, "learning_rate": 0.0003779644730092272, "loss": 0.8002, "step": 72 }, { "epoch": 0.7143674871207785, "grad_norm": 0.32948872447013855, "learning_rate": 0.00035921060405354985, "loss": 0.7834, "step": 78 }, { "epoch": 0.7693188322839153, "grad_norm": 0.3329070508480072, "learning_rate": 0.00034299717028501764, "loss": 0.7763, "step": 84 }, { "epoch": 0.8242701774470521, "grad_norm": 0.3928099274635315, "learning_rate": 0.0003287979746107146, "loss": 0.7737, "step": 90 }, { "epoch": 0.8792215226101889, "grad_norm": 0.262188583612442, "learning_rate": 0.00031622776601683794, "loss": 0.777, "step": 96 }, { "epoch": 0.9341728677733258, "grad_norm": 0.2854004502296448, "learning_rate": 0.00030499714066520935, "loss": 0.7828, "step": 102 }, { "epoch": 0.9891242129364625, "grad_norm": 0.2840617895126343, "learning_rate": 0.0002948839123097943, "loss": 0.7732, "step": 108 }, { "epoch": 0.998282770463652, "step": 109, "total_flos": 1.1070545524204503e+18, "train_loss": 0.9170126422829584, "train_runtime": 980.4859, "train_samples_per_second": 14.252, "train_steps_per_second": 0.111 } ], "logging_steps": 6, "max_steps": 109, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1070545524204503e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }