{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6969365426695844, "eval_steps": 8, "global_step": 48, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0350109409190372, "eval_loss": 10.379219055175781, "eval_runtime": 0.7148, "eval_samples_per_second": 135.698, "eval_steps_per_second": 68.548, "step": 1 }, { "epoch": 0.1050328227571116, "grad_norm": 0.10373959690332413, "learning_rate": 3e-05, "loss": 10.3774, "step": 3 }, { "epoch": 0.2100656455142232, "grad_norm": 0.11316292732954025, "learning_rate": 6e-05, "loss": 10.3758, "step": 6 }, { "epoch": 0.2800875273522976, "eval_loss": 10.377415657043457, "eval_runtime": 0.7201, "eval_samples_per_second": 134.703, "eval_steps_per_second": 68.046, "step": 8 }, { "epoch": 0.3150984682713348, "grad_norm": 0.10614413022994995, "learning_rate": 9e-05, "loss": 10.3741, "step": 9 }, { "epoch": 0.4201312910284464, "grad_norm": 0.11337128281593323, "learning_rate": 0.00012, "loss": 10.3761, "step": 12 }, { "epoch": 0.5251641137855579, "grad_norm": 0.10311492532491684, "learning_rate": 0.00015000000000000001, "loss": 10.3712, "step": 15 }, { "epoch": 0.5601750547045952, "eval_loss": 10.371430397033691, "eval_runtime": 0.7202, "eval_samples_per_second": 134.683, "eval_steps_per_second": 68.036, "step": 16 }, { "epoch": 0.6301969365426696, "grad_norm": 0.12901803851127625, "learning_rate": 0.00018, "loss": 10.3699, "step": 18 }, { "epoch": 0.7352297592997812, "grad_norm": 0.13586793839931488, "learning_rate": 0.0001998867339183008, "loss": 10.366, "step": 21 }, { "epoch": 0.8402625820568927, "grad_norm": 0.18823161721229553, "learning_rate": 0.00019819286972627066, "loss": 10.3635, "step": 24 }, { "epoch": 0.8402625820568927, "eval_loss": 10.358380317687988, "eval_runtime": 0.7207, "eval_samples_per_second": 134.583, "eval_steps_per_second": 67.985, "step": 24 }, { "epoch": 0.9452954048140044, "grad_norm": 0.15795904397964478, "learning_rate": 0.00019450008187146684, "loss": 10.3562, "step": 27 }, { "epoch": 1.0667396061269148, "grad_norm": 0.22537720203399658, "learning_rate": 0.00018888354486549237, "loss": 13.5544, "step": 30 }, { "epoch": 1.136761487964989, "eval_loss": 10.337327003479004, "eval_runtime": 0.7182, "eval_samples_per_second": 135.057, "eval_steps_per_second": 68.225, "step": 32 }, { "epoch": 1.1717724288840263, "grad_norm": 0.27254220843315125, "learning_rate": 0.00018145759520503358, "loss": 10.2022, "step": 33 }, { "epoch": 1.276805251641138, "grad_norm": 0.2869032025337219, "learning_rate": 0.00017237340381050703, "loss": 10.3997, "step": 36 }, { "epoch": 1.3818380743982495, "grad_norm": 0.2603079676628113, "learning_rate": 0.00016181589862206052, "loss": 10.2418, "step": 39 }, { "epoch": 1.4168490153172866, "eval_loss": 10.31363582611084, "eval_runtime": 0.7196, "eval_samples_per_second": 134.788, "eval_steps_per_second": 68.089, "step": 40 }, { "epoch": 1.486870897155361, "grad_norm": 0.1934972107410431, "learning_rate": 0.00015000000000000001, "loss": 10.3704, "step": 42 }, { "epoch": 1.5919037199124726, "grad_norm": 0.19271045923233032, "learning_rate": 0.00013716624556603274, "loss": 10.3807, "step": 45 }, { "epoch": 1.6969365426695844, "grad_norm": 0.14647410809993744, "learning_rate": 0.00012357589355094275, "loss": 10.2948, "step": 48 }, { "epoch": 1.6969365426695844, "eval_loss": 10.299408912658691, "eval_runtime": 0.7215, "eval_samples_per_second": 134.448, "eval_steps_per_second": 67.917, "step": 48 } ], "logging_steps": 3, "max_steps": 86, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 8, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 20595493699584.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }