{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03752697251149264, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00018763486255746317, "eval_loss": 0.9478870630264282, "eval_runtime": 246.4229, "eval_samples_per_second": 9.106, "eval_steps_per_second": 4.553, "step": 1 }, { "epoch": 0.0018763486255746317, "grad_norm": 1.1852015256881714, "learning_rate": 0.00019967573081342103, "loss": 3.5285, "step": 10 }, { "epoch": 0.0037526972511492634, "grad_norm": 1.0713735818862915, "learning_rate": 0.0001970941817426052, "loss": 2.3104, "step": 20 }, { "epoch": 0.005629045876723895, "grad_norm": 1.1629306077957153, "learning_rate": 0.00019199794436588243, "loss": 2.0838, "step": 30 }, { "epoch": 0.007505394502298527, "grad_norm": 0.9892584681510925, "learning_rate": 0.0001845190085543795, "loss": 2.2854, "step": 40 }, { "epoch": 0.00938174312787316, "grad_norm": 6.3884663581848145, "learning_rate": 0.00017485107481711012, "loss": 1.9859, "step": 50 }, { "epoch": 0.00938174312787316, "eval_loss": 0.5233616232872009, "eval_runtime": 246.463, "eval_samples_per_second": 9.105, "eval_steps_per_second": 4.552, "step": 50 }, { "epoch": 0.01125809175344779, "grad_norm": 0.802453875541687, "learning_rate": 0.00016324453755953773, "loss": 2.1749, "step": 60 }, { "epoch": 0.013134440379022422, "grad_norm": 1.9824305772781372, "learning_rate": 0.00015000000000000001, "loss": 2.3033, "step": 70 }, { "epoch": 0.015010789004597054, "grad_norm": 1.2942346334457397, "learning_rate": 0.00013546048870425356, "loss": 2.1344, "step": 80 }, { "epoch": 0.016887137630171686, "grad_norm": 0.9966654777526855, "learning_rate": 0.00012000256937760445, "loss": 2.0934, "step": 90 }, { "epoch": 0.01876348625574632, "grad_norm": 0.9738155603408813, "learning_rate": 0.00010402659401094152, "loss": 1.9877, "step": 100 }, { "epoch": 0.01876348625574632, "eval_loss": 0.5045682787895203, "eval_runtime": 247.6377, "eval_samples_per_second": 9.062, "eval_steps_per_second": 4.531, "step": 100 }, { "epoch": 0.02063983488132095, "grad_norm": 0.9715750217437744, "learning_rate": 8.79463319744677e-05, "loss": 2.1053, "step": 110 }, { "epoch": 0.02251618350689558, "grad_norm": 0.8509733080863953, "learning_rate": 7.217825360835473e-05, "loss": 1.8666, "step": 120 }, { "epoch": 0.024392532132470213, "grad_norm": 1.073076844215393, "learning_rate": 5.713074385969457e-05, "loss": 1.9373, "step": 130 }, { "epoch": 0.026268880758044844, "grad_norm": 1.0255498886108398, "learning_rate": 4.3193525326884435e-05, "loss": 1.9798, "step": 140 }, { "epoch": 0.028145229383619477, "grad_norm": 0.932864785194397, "learning_rate": 3.072756464904006e-05, "loss": 2.0326, "step": 150 }, { "epoch": 0.028145229383619477, "eval_loss": 0.4930672347545624, "eval_runtime": 247.1383, "eval_samples_per_second": 9.08, "eval_steps_per_second": 4.54, "step": 150 }, { "epoch": 0.030021578009194107, "grad_norm": 0.8730648756027222, "learning_rate": 2.0055723659649904e-05, "loss": 1.9601, "step": 160 }, { "epoch": 0.03189792663476874, "grad_norm": 1.2730159759521484, "learning_rate": 1.1454397434679021e-05, "loss": 2.2116, "step": 170 }, { "epoch": 0.03377427526034337, "grad_norm": 0.9545321464538574, "learning_rate": 5.146355805285452e-06, "loss": 1.8604, "step": 180 }, { "epoch": 0.035650623885918005, "grad_norm": 0.9635279178619385, "learning_rate": 1.2949737362087156e-06, "loss": 1.9259, "step": 190 }, { "epoch": 0.03752697251149264, "grad_norm": 1.0731476545333862, "learning_rate": 0.0, "loss": 2.2284, "step": 200 }, { "epoch": 0.03752697251149264, "eval_loss": 0.49111470580101013, "eval_runtime": 247.0242, "eval_samples_per_second": 9.084, "eval_steps_per_second": 4.542, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.298643675512832e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }