{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.015601841017240034, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.800920508620017e-05, "eval_loss": 2.6932764053344727, "eval_runtime": 161.6543, "eval_samples_per_second": 33.392, "eval_steps_per_second": 16.696, "step": 1 }, { "epoch": 0.0007800920508620017, "grad_norm": 1.6918610334396362, "learning_rate": 0.0002, "loss": 1.6264, "step": 10 }, { "epoch": 0.0015601841017240034, "grad_norm": 0.8769293427467346, "learning_rate": 0.0002, "loss": 0.9108, "step": 20 }, { "epoch": 0.002340276152586005, "grad_norm": 0.8490325808525085, "learning_rate": 0.0002, "loss": 0.8986, "step": 30 }, { "epoch": 0.003120368203448007, "grad_norm": 0.7544106841087341, "learning_rate": 0.0002, "loss": 0.837, "step": 40 }, { "epoch": 0.0039004602543100085, "grad_norm": 0.8223780989646912, "learning_rate": 0.0002, "loss": 0.9035, "step": 50 }, { "epoch": 0.0039004602543100085, "eval_loss": 0.8136085271835327, "eval_runtime": 160.6948, "eval_samples_per_second": 33.592, "eval_steps_per_second": 16.796, "step": 50 }, { "epoch": 0.00468055230517201, "grad_norm": 1.2069497108459473, "learning_rate": 0.0002, "loss": 0.7518, "step": 60 }, { "epoch": 0.005460644356034012, "grad_norm": 0.8096679449081421, "learning_rate": 0.0002, "loss": 0.8775, "step": 70 }, { "epoch": 0.006240736406896014, "grad_norm": 0.6722874045372009, "learning_rate": 0.0002, "loss": 0.8393, "step": 80 }, { "epoch": 0.007020828457758015, "grad_norm": 0.7288264036178589, "learning_rate": 0.0002, "loss": 0.8743, "step": 90 }, { "epoch": 0.007800920508620017, "grad_norm": 0.756664514541626, "learning_rate": 0.0002, "loss": 0.8471, "step": 100 }, { "epoch": 0.007800920508620017, "eval_loss": 0.787709653377533, "eval_runtime": 160.5519, "eval_samples_per_second": 33.622, "eval_steps_per_second": 16.811, "step": 100 }, { "epoch": 0.008581012559482019, "grad_norm": 0.6181501746177673, "learning_rate": 0.0002, "loss": 0.8125, "step": 110 }, { "epoch": 0.00936110461034402, "grad_norm": 0.9252316355705261, "learning_rate": 0.0002, "loss": 0.8039, "step": 120 }, { "epoch": 0.010141196661206022, "grad_norm": 0.8269910216331482, "learning_rate": 0.0002, "loss": 0.8983, "step": 130 }, { "epoch": 0.010921288712068024, "grad_norm": 0.7751689553260803, "learning_rate": 0.0002, "loss": 0.7934, "step": 140 }, { "epoch": 0.011701380762930026, "grad_norm": 0.7328248620033264, "learning_rate": 0.0002, "loss": 0.7806, "step": 150 }, { "epoch": 0.011701380762930026, "eval_loss": 0.7735591530799866, "eval_runtime": 160.5021, "eval_samples_per_second": 33.632, "eval_steps_per_second": 16.816, "step": 150 }, { "epoch": 0.012481472813792027, "grad_norm": 0.7903790473937988, "learning_rate": 0.0002, "loss": 0.9333, "step": 160 }, { "epoch": 0.013261564864654029, "grad_norm": 0.736691951751709, "learning_rate": 0.0002, "loss": 0.7328, "step": 170 }, { "epoch": 0.01404165691551603, "grad_norm": 0.7038111090660095, "learning_rate": 0.0002, "loss": 0.7207, "step": 180 }, { "epoch": 0.014821748966378032, "grad_norm": 0.7238824367523193, "learning_rate": 0.0002, "loss": 0.7662, "step": 190 }, { "epoch": 0.015601841017240034, "grad_norm": 0.7471156120300293, "learning_rate": 0.0002, "loss": 0.7058, "step": 200 }, { "epoch": 0.015601841017240034, "eval_loss": 0.7626497149467468, "eval_runtime": 160.42, "eval_samples_per_second": 33.649, "eval_steps_per_second": 16.825, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.328602305200128e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }