{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9988649262202043, "eval_steps": 6, "global_step": 110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05448354143019296, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.062, "step": 6 }, { "epoch": 0.10896708286038592, "grad_norm": 0.2376125305891037, "learning_rate": 0.001, "loss": 1.0224, "step": 12 }, { "epoch": 0.16345062429057888, "grad_norm": 0.30375346541404724, "learning_rate": 0.0016329931618554523, "loss": 1.009, "step": 18 }, { "epoch": 0.21793416572077184, "grad_norm": 0.2895285189151764, "learning_rate": 0.0009428090415820635, "loss": 0.8092, "step": 24 }, { "epoch": 0.2724177071509648, "grad_norm": 0.22418786585330963, "learning_rate": 0.0007559289460184544, "loss": 0.8055, "step": 30 }, { "epoch": 0.32690124858115777, "grad_norm": 0.21890012919902802, "learning_rate": 0.0006324555320336759, "loss": 0.7342, "step": 36 }, { "epoch": 0.3813847900113507, "grad_norm": 0.20965541899204254, "learning_rate": 0.0005547001962252292, "loss": 0.7073, "step": 42 }, { "epoch": 0.4358683314415437, "grad_norm": 0.23984524607658386, "learning_rate": 0.0005, "loss": 0.6812, "step": 48 }, { "epoch": 0.49035187287173665, "grad_norm": 0.2955872416496277, "learning_rate": 0.0004588314677411235, "loss": 0.699, "step": 54 }, { "epoch": 0.5448354143019296, "grad_norm": 0.1915532946586609, "learning_rate": 0.00042640143271122083, "loss": 0.6867, "step": 60 }, { "epoch": 0.5993189557321226, "grad_norm": 0.19135332107543945, "learning_rate": 0.0004, "loss": 0.6576, "step": 66 }, { "epoch": 0.6538024971623155, "grad_norm": 0.21214482188224792, "learning_rate": 0.0003779644730092272, "loss": 0.6421, "step": 72 }, { "epoch": 0.7082860385925085, "grad_norm": 0.1696617305278778, "learning_rate": 0.00035921060405354985, "loss": 0.6718, "step": 78 }, { "epoch": 0.7627695800227015, "grad_norm": 0.18742182850837708, "learning_rate": 0.00034299717028501764, "loss": 0.6636, "step": 84 }, { "epoch": 0.8172531214528944, "grad_norm": 0.19666869938373566, "learning_rate": 0.0003287979746107146, "loss": 0.6558, "step": 90 }, { "epoch": 0.8717366628830874, "grad_norm": 0.1772751361131668, "learning_rate": 0.00031622776601683794, "loss": 0.6604, "step": 96 }, { "epoch": 0.9262202043132803, "grad_norm": 0.23768457770347595, "learning_rate": 0.00030499714066520935, "loss": 0.6501, "step": 102 }, { "epoch": 0.9807037457434733, "grad_norm": 0.17372557520866394, "learning_rate": 0.0002948839123097943, "loss": 0.6645, "step": 108 }, { "epoch": 0.9988649262202043, "step": 110, "total_flos": 1.1172110162041242e+18, "train_loss": 0.7473254984075373, "train_runtime": 984.6579, "train_samples_per_second": 14.314, "train_steps_per_second": 0.112 } ], "logging_steps": 6, "max_steps": 110, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1172110162041242e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }