{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.16096579476861167, "eval_steps": 500, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008048289738430584, "grad_norm": Infinity, "learning_rate": 0.0001, "loss": 2.3834, "step": 1 }, { "epoch": 0.01609657947686117, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 2.3102, "step": 2 }, { "epoch": 0.02414486921529175, "grad_norm": Infinity, "learning_rate": 0.000199658449300667, "loss": 2.3726, "step": 3 }, { "epoch": 0.03219315895372234, "grad_norm": Infinity, "learning_rate": 0.00019863613034027224, "loss": 2.1816, "step": 4 }, { "epoch": 0.04024144869215292, "grad_norm": Infinity, "learning_rate": 0.00019694002659393305, "loss": 2.4038, "step": 5 }, { "epoch": 0.0482897384305835, "grad_norm": Infinity, "learning_rate": 0.00019458172417006347, "loss": 2.4681, "step": 6 }, { "epoch": 0.056338028169014086, "grad_norm": Infinity, "learning_rate": 0.00019157733266550575, "loss": 2.3357, "step": 7 }, { "epoch": 0.06438631790744467, "grad_norm": Infinity, "learning_rate": 0.0001879473751206489, "loss": 2.3554, "step": 8 }, { "epoch": 0.07243460764587525, "grad_norm": Infinity, "learning_rate": 0.00018371664782625287, "loss": 2.2722, "step": 9 }, { "epoch": 0.08048289738430583, "grad_norm": Infinity, "learning_rate": 0.00017891405093963938, "loss": 2.3479, "step": 10 }, { "epoch": 0.08853118712273642, "grad_norm": Infinity, "learning_rate": 0.00017357239106731317, "loss": 2.4537, "step": 11 }, { "epoch": 0.096579476861167, "grad_norm": Infinity, "learning_rate": 0.00016772815716257412, "loss": 2.4485, "step": 12 }, { "epoch": 0.10462776659959759, "grad_norm": Infinity, "learning_rate": 0.0001614212712689668, "loss": 2.4963, "step": 13 }, { "epoch": 0.11267605633802817, "grad_norm": Infinity, "learning_rate": 0.00015469481581224272, "loss": 2.3565, "step": 14 }, { "epoch": 0.12072434607645875, "grad_norm": Infinity, "learning_rate": 0.00014759473930370736, "loss": 2.38, "step": 15 }, { "epoch": 0.12877263581488935, "grad_norm": Infinity, "learning_rate": 0.00014016954246529696, "loss": 2.4191, "step": 16 }, { "epoch": 0.13682092555331993, "grad_norm": Infinity, "learning_rate": 0.00013246994692046836, "loss": 2.4504, "step": 17 }, { "epoch": 0.1448692152917505, "grad_norm": Infinity, "learning_rate": 0.00012454854871407994, "loss": 2.4143, "step": 18 }, { "epoch": 0.1529175050301811, "grad_norm": Infinity, "learning_rate": 0.00011645945902807341, "loss": 2.3919, "step": 19 }, { "epoch": 0.16096579476861167, "grad_norm": Infinity, "learning_rate": 0.00010825793454723325, "loss": 2.3508, "step": 20 } ], "logging_steps": 1, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.666832784162816e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }