{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.05454700417625501, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0027273502088127504, "grad_norm": 15.259760856628418, "learning_rate": 1e-05, "loss": 2.9208, "step": 1 }, { "epoch": 0.0027273502088127504, "eval_loss": 2.921485662460327, "eval_runtime": 219.1663, "eval_samples_per_second": 11.27, "eval_steps_per_second": 2.82, "step": 1 }, { "epoch": 0.005454700417625501, "grad_norm": 15.232994079589844, "learning_rate": 2e-05, "loss": 2.8955, "step": 2 }, { "epoch": 0.008182050626438252, "grad_norm": 15.136183738708496, "learning_rate": 3e-05, "loss": 2.852, "step": 3 }, { "epoch": 0.010909400835251002, "grad_norm": 16.66099739074707, "learning_rate": 4e-05, "loss": 2.7422, "step": 4 }, { "epoch": 0.013636751044063752, "grad_norm": 18.048038482666016, "learning_rate": 5e-05, "loss": 2.2015, "step": 5 }, { "epoch": 0.013636751044063752, "eval_loss": 1.4312995672225952, "eval_runtime": 223.4476, "eval_samples_per_second": 11.054, "eval_steps_per_second": 2.766, "step": 5 }, { "epoch": 0.016364101252876503, "grad_norm": 14.389070510864258, "learning_rate": 6e-05, "loss": 1.421, "step": 6 }, { "epoch": 0.01909145146168925, "grad_norm": 8.234234809875488, "learning_rate": 7e-05, "loss": 0.6896, "step": 7 }, { "epoch": 0.021818801670502003, "grad_norm": 4.542180061340332, "learning_rate": 8e-05, "loss": 0.3594, "step": 8 }, { "epoch": 0.02454615187931475, "grad_norm": 3.838514804840088, "learning_rate": 9e-05, "loss": 0.2131, "step": 9 }, { "epoch": 0.027273502088127503, "grad_norm": 1.6445430517196655, "learning_rate": 0.0001, "loss": 0.1487, "step": 10 }, { "epoch": 0.027273502088127503, "eval_loss": 0.26161929965019226, "eval_runtime": 223.4522, "eval_samples_per_second": 11.054, "eval_steps_per_second": 2.766, "step": 10 }, { "epoch": 0.030000852296940255, "grad_norm": 5.722675800323486, "learning_rate": 9.755282581475769e-05, "loss": 0.2533, "step": 11 }, { "epoch": 0.03272820250575301, "grad_norm": 2.8063879013061523, "learning_rate": 9.045084971874738e-05, "loss": 0.1486, "step": 12 }, { "epoch": 0.035455552714565755, "grad_norm": 1.7891703844070435, "learning_rate": 7.938926261462366e-05, "loss": 0.1449, "step": 13 }, { "epoch": 0.0381829029233785, "grad_norm": 2.760188579559326, "learning_rate": 6.545084971874738e-05, "loss": 0.1632, "step": 14 }, { "epoch": 0.04091025313219126, "grad_norm": 0.9324430227279663, "learning_rate": 5e-05, "loss": 0.1375, "step": 15 }, { "epoch": 0.04091025313219126, "eval_loss": 0.21234437823295593, "eval_runtime": 219.4952, "eval_samples_per_second": 11.253, "eval_steps_per_second": 2.816, "step": 15 }, { "epoch": 0.04363760334100401, "grad_norm": 3.428504705429077, "learning_rate": 3.4549150281252636e-05, "loss": 0.197, "step": 16 }, { "epoch": 0.046364953549816755, "grad_norm": 2.601438283920288, "learning_rate": 2.061073738537635e-05, "loss": 0.1705, "step": 17 }, { "epoch": 0.0490923037586295, "grad_norm": 2.918519973754883, "learning_rate": 9.549150281252633e-06, "loss": 0.1809, "step": 18 }, { "epoch": 0.05181965396744226, "grad_norm": 1.0848146677017212, "learning_rate": 2.4471741852423237e-06, "loss": 0.1384, "step": 19 }, { "epoch": 0.05454700417625501, "grad_norm": 0.06631074100732803, "learning_rate": 0.0, "loss": 0.1314, "step": 20 }, { "epoch": 0.05454700417625501, "eval_loss": 0.13964733481407166, "eval_runtime": 219.3967, "eval_samples_per_second": 11.258, "eval_steps_per_second": 2.817, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.182135069160243e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }