{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.010260970687827069,
  "eval_steps": 500,
  "global_step": 150,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0006840647125218045,
      "grad_norm": 0.6777583360671997,
      "learning_rate": 0.0001931034482758621,
      "loss": 2.5631,
      "step": 10
    },
    {
      "epoch": 0.001368129425043609,
      "grad_norm": 0.529003918170929,
      "learning_rate": 0.0001793103448275862,
      "loss": 2.2202,
      "step": 20
    },
    {
      "epoch": 0.0020521941375654137,
      "grad_norm": 0.6383516192436218,
      "learning_rate": 0.00016551724137931035,
      "loss": 2.2541,
      "step": 30
    },
    {
      "epoch": 0.002736258850087218,
      "grad_norm": 0.547764778137207,
      "learning_rate": 0.00015172413793103449,
      "loss": 2.1831,
      "step": 40
    },
    {
      "epoch": 0.003420323562609023,
      "grad_norm": NaN,
      "learning_rate": 0.0001393103448275862,
      "loss": 2.2025,
      "step": 50
    },
    {
      "epoch": 0.0041043882751308275,
      "grad_norm": 0.6107162237167358,
      "learning_rate": 0.00012551724137931035,
      "loss": 2.1387,
      "step": 60
    },
    {
      "epoch": 0.004788452987652632,
      "grad_norm": 0.5852159261703491,
      "learning_rate": 0.00011172413793103449,
      "loss": 2.1362,
      "step": 70
    },
    {
      "epoch": 0.005472517700174436,
      "grad_norm": 0.587351381778717,
      "learning_rate": 9.793103448275862e-05,
      "loss": 2.1783,
      "step": 80
    },
    {
      "epoch": 0.006156582412696241,
      "grad_norm": 0.4379996657371521,
      "learning_rate": 8.413793103448277e-05,
      "loss": 2.0619,
      "step": 90
    },
    {
      "epoch": 0.006840647125218046,
      "grad_norm": 0.6348068714141846,
      "learning_rate": 7.03448275862069e-05,
      "loss": 2.1559,
      "step": 100
    },
    {
      "epoch": 0.0075247118377398505,
      "grad_norm": 0.5908586382865906,
      "learning_rate": 5.6551724137931037e-05,
      "loss": 2.0957,
      "step": 110
    },
    {
      "epoch": 0.008208776550261655,
      "grad_norm": 0.6663537621498108,
      "learning_rate": 4.275862068965518e-05,
      "loss": 2.2033,
      "step": 120
    },
    {
      "epoch": 0.008892841262783458,
      "grad_norm": 0.5192745923995972,
      "learning_rate": 2.8965517241379313e-05,
      "loss": 2.0376,
      "step": 130
    },
    {
      "epoch": 0.009576905975305264,
      "grad_norm": 0.6954190731048584,
      "learning_rate": 1.5172413793103448e-05,
      "loss": 2.108,
      "step": 140
    },
    {
      "epoch": 0.010260970687827069,
      "grad_norm": 0.6196115016937256,
      "learning_rate": 1.3793103448275862e-06,
      "loss": 1.9693,
      "step": 150
    }
  ],
  "logging_steps": 10,
  "max_steps": 150,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.164445885681664e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}