{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.009216377502822516, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001843275500564503, "eval_loss": 10.371973037719727, "eval_runtime": 19.4745, "eval_samples_per_second": 469.178, "eval_steps_per_second": 117.333, "step": 1 }, { "epoch": 0.0005529826501693509, "grad_norm": 0.07208254933357239, "learning_rate": 3e-05, "loss": 10.3716, "step": 3 }, { "epoch": 0.0009216377502822516, "eval_loss": 10.371664047241211, "eval_runtime": 19.302, "eval_samples_per_second": 473.371, "eval_steps_per_second": 118.382, "step": 5 }, { "epoch": 0.0011059653003387018, "grad_norm": 0.06621870398521423, "learning_rate": 6e-05, "loss": 10.3702, "step": 6 }, { "epoch": 0.0016589479505080527, "grad_norm": 0.08342023938894272, "learning_rate": 9e-05, "loss": 10.3734, "step": 9 }, { "epoch": 0.0018432755005645032, "eval_loss": 10.370595932006836, "eval_runtime": 19.265, "eval_samples_per_second": 474.28, "eval_steps_per_second": 118.609, "step": 10 }, { "epoch": 0.0022119306006774036, "grad_norm": 0.06515509635210037, "learning_rate": 9.938441702975689e-05, "loss": 10.3728, "step": 12 }, { "epoch": 0.0027649132508467547, "grad_norm": 0.07225878536701202, "learning_rate": 9.619397662556435e-05, "loss": 10.3706, "step": 15 }, { "epoch": 0.0027649132508467547, "eval_loss": 10.369028091430664, "eval_runtime": 19.6179, "eval_samples_per_second": 465.748, "eval_steps_per_second": 116.475, "step": 15 }, { "epoch": 0.0033178959010161054, "grad_norm": 0.07449828833341599, "learning_rate": 9.045084971874738e-05, "loss": 10.3691, "step": 18 }, { "epoch": 0.0036865510011290063, "eval_loss": 10.36725902557373, "eval_runtime": 19.3786, "eval_samples_per_second": 471.5, "eval_steps_per_second": 117.914, "step": 20 }, { "epoch": 0.0038708785511854566, "grad_norm": 0.08717207610607147, "learning_rate": 8.247240241650918e-05, "loss": 10.3667, "step": 21 }, { "epoch": 0.004423861201354807, "grad_norm": 0.12931972742080688, "learning_rate": 7.269952498697734e-05, "loss": 10.3669, "step": 24 }, { "epoch": 0.004608188751411258, "eval_loss": 10.365208625793457, "eval_runtime": 19.5098, "eval_samples_per_second": 468.329, "eval_steps_per_second": 117.121, "step": 25 }, { "epoch": 0.004976843851524158, "grad_norm": 0.11614447087049484, "learning_rate": 6.167226819279528e-05, "loss": 10.3667, "step": 27 }, { "epoch": 0.0055298265016935095, "grad_norm": 0.13167273998260498, "learning_rate": 5e-05, "loss": 10.3669, "step": 30 }, { "epoch": 0.0055298265016935095, "eval_loss": 10.363195419311523, "eval_runtime": 19.4376, "eval_samples_per_second": 470.068, "eval_steps_per_second": 117.556, "step": 30 }, { "epoch": 0.006082809151862861, "grad_norm": 0.1491217315196991, "learning_rate": 3.832773180720475e-05, "loss": 10.3614, "step": 33 }, { "epoch": 0.006451464251975761, "eval_loss": 10.361522674560547, "eval_runtime": 19.5391, "eval_samples_per_second": 467.628, "eval_steps_per_second": 116.945, "step": 35 }, { "epoch": 0.006635791802032211, "grad_norm": 0.14466819167137146, "learning_rate": 2.7300475013022663e-05, "loss": 10.364, "step": 36 }, { "epoch": 0.007188774452201562, "grad_norm": 0.17304567992687225, "learning_rate": 1.7527597583490822e-05, "loss": 10.3644, "step": 39 }, { "epoch": 0.007373102002258013, "eval_loss": 10.36044979095459, "eval_runtime": 19.359, "eval_samples_per_second": 471.976, "eval_steps_per_second": 118.033, "step": 40 }, { "epoch": 0.007741757102370913, "grad_norm": 0.14071166515350342, "learning_rate": 9.549150281252633e-06, "loss": 10.3592, "step": 42 }, { "epoch": 0.008294739752540263, "grad_norm": 0.18590952455997467, "learning_rate": 3.8060233744356633e-06, "loss": 10.3607, "step": 45 }, { "epoch": 0.008294739752540263, "eval_loss": 10.360003471374512, "eval_runtime": 19.526, "eval_samples_per_second": 467.94, "eval_steps_per_second": 117.023, "step": 45 }, { "epoch": 0.008847722402709614, "grad_norm": 0.1559176743030548, "learning_rate": 6.15582970243117e-07, "loss": 10.3592, "step": 48 }, { "epoch": 0.009216377502822516, "eval_loss": 10.359918594360352, "eval_runtime": 19.3944, "eval_samples_per_second": 471.115, "eval_steps_per_second": 117.818, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5577533030400.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }