{
  "best_metric": 2.580864906311035,
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
  "epoch": 0.05,
  "eval_steps": 50,
  "global_step": 50,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.001,
      "grad_norm": 0.2811749279499054,
      "learning_rate": 2e-05,
      "loss": 2.6895,
      "step": 1
    },
    {
      "epoch": 0.001,
      "eval_loss": 3.0125324726104736,
      "eval_runtime": 4.6936,
      "eval_samples_per_second": 4.474,
      "eval_steps_per_second": 4.474,
      "step": 1
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.2987586557865143,
      "learning_rate": 4e-05,
      "loss": 2.8912,
      "step": 2
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.452608197927475,
      "learning_rate": 6e-05,
      "loss": 3.4357,
      "step": 3
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.38785919547080994,
      "learning_rate": 8e-05,
      "loss": 2.5889,
      "step": 4
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.38931822776794434,
      "learning_rate": 0.0001,
      "loss": 2.7513,
      "step": 5
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.516417384147644,
      "learning_rate": 0.00012,
      "loss": 3.2128,
      "step": 6
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.4206741750240326,
      "learning_rate": 0.00014,
      "loss": 2.9368,
      "step": 7
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.48171964287757874,
      "learning_rate": 0.00016,
      "loss": 2.8618,
      "step": 8
    },
    {
      "epoch": 0.009,
      "grad_norm": 0.8544142842292786,
      "learning_rate": 0.00018,
      "loss": 3.0312,
      "step": 9
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.848558247089386,
      "learning_rate": 0.0002,
      "loss": 2.9334,
      "step": 10
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.8914313316345215,
      "learning_rate": 0.00019999996900269505,
      "loss": 2.7981,
      "step": 11
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.6103464365005493,
      "learning_rate": 0.0001999998760107994,
      "loss": 2.7247,
      "step": 12
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.7618600726127625,
      "learning_rate": 0.00019999972102437074,
      "loss": 2.472,
      "step": 13
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.6825264692306519,
      "learning_rate": 0.00019999950404350512,
      "loss": 2.6008,
      "step": 14
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.5940832495689392,
      "learning_rate": 0.00019999922506833704,
      "loss": 2.1996,
      "step": 15
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.6273623108863831,
      "learning_rate": 0.00019999888409903948,
      "loss": 2.3565,
      "step": 16
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.7437952160835266,
      "learning_rate": 0.00019999848113582384,
      "loss": 2.7232,
      "step": 17
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.5971533060073853,
      "learning_rate": 0.0001999980161789399,
      "loss": 2.509,
      "step": 18
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.5190719962120056,
      "learning_rate": 0.00019999748922867592,
      "loss": 2.3535,
      "step": 19
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9244285821914673,
      "learning_rate": 0.00019999690028535855,
      "loss": 2.7599,
      "step": 20
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.8340674638748169,
      "learning_rate": 0.00019999624934935296,
      "loss": 3.0057,
      "step": 21
    },
    {
      "epoch": 0.022,
      "grad_norm": 1.0633089542388916,
      "learning_rate": 0.00019999553642106266,
      "loss": 2.2808,
      "step": 22
    },
    {
      "epoch": 0.023,
      "grad_norm": 4.8767266273498535,
      "learning_rate": 0.00019999476150092967,
      "loss": 2.8268,
      "step": 23
    },
    {
      "epoch": 0.024,
      "grad_norm": 2.7197344303131104,
      "learning_rate": 0.00019999392458943432,
      "loss": 2.6517,
      "step": 24
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.9329593777656555,
      "learning_rate": 0.00019999302568709547,
      "loss": 2.212,
      "step": 25
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.6679103374481201,
      "learning_rate": 0.00019999206479447045,
      "loss": 2.0117,
      "step": 26
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.5428286790847778,
      "learning_rate": 0.00019999104191215493,
      "loss": 2.7582,
      "step": 27
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.5552177429199219,
      "learning_rate": 0.00019998995704078305,
      "loss": 2.54,
      "step": 28
    },
    {
      "epoch": 0.029,
      "grad_norm": 0.5453671216964722,
      "learning_rate": 0.00019998881018102737,
      "loss": 2.5358,
      "step": 29
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.47653189301490784,
      "learning_rate": 0.00019998760133359885,
      "loss": 2.2443,
      "step": 30
    },
    {
      "epoch": 0.031,
      "grad_norm": 0.755976140499115,
      "learning_rate": 0.0001999863304992469,
      "loss": 2.5519,
      "step": 31
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.7680912017822266,
      "learning_rate": 0.00019998499767875943,
      "loss": 2.7503,
      "step": 32
    },
    {
      "epoch": 0.033,
      "grad_norm": 3.768080472946167,
      "learning_rate": 0.0001999836028729627,
      "loss": 2.6051,
      "step": 33
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.5304062962532043,
      "learning_rate": 0.00019998214608272136,
      "loss": 2.2065,
      "step": 34
    },
    {
      "epoch": 0.035,
      "grad_norm": 1.1568998098373413,
      "learning_rate": 0.00019998062730893862,
      "loss": 2.444,
      "step": 35
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.8356309533119202,
      "learning_rate": 0.000199979046552556,
      "loss": 2.5763,
      "step": 36
    },
    {
      "epoch": 0.037,
      "grad_norm": 0.5210471749305725,
      "learning_rate": 0.00019997740381455346,
      "loss": 2.8545,
      "step": 37
    },
    {
      "epoch": 0.038,
      "grad_norm": 1.550714373588562,
      "learning_rate": 0.00019997569909594947,
      "loss": 2.6236,
      "step": 38
    },
    {
      "epoch": 0.039,
      "grad_norm": 0.6044741868972778,
      "learning_rate": 0.0001999739323978008,
      "loss": 2.5349,
      "step": 39
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9703565239906311,
      "learning_rate": 0.00019997210372120274,
      "loss": 3.1004,
      "step": 40
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.7796650528907776,
      "learning_rate": 0.000199970213067289,
      "loss": 2.5757,
      "step": 41
    },
    {
      "epoch": 0.042,
      "grad_norm": 0.6824871301651001,
      "learning_rate": 0.00019996826043723162,
      "loss": 2.6766,
      "step": 42
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.8048773407936096,
      "learning_rate": 0.00019996624583224114,
      "loss": 2.3065,
      "step": 43
    },
    {
      "epoch": 0.044,
      "grad_norm": 0.5458154082298279,
      "learning_rate": 0.00019996416925356652,
      "loss": 2.4336,
      "step": 44
    },
    {
      "epoch": 0.045,
      "grad_norm": 0.623190701007843,
      "learning_rate": 0.00019996203070249516,
      "loss": 2.3835,
      "step": 45
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.5928781032562256,
      "learning_rate": 0.00019995983018035278,
      "loss": 2.3408,
      "step": 46
    },
    {
      "epoch": 0.047,
      "grad_norm": 0.5790976881980896,
      "learning_rate": 0.00019995756768850364,
      "loss": 2.3878,
      "step": 47
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.5648425817489624,
      "learning_rate": 0.00019995524322835034,
      "loss": 2.2885,
      "step": 48
    },
    {
      "epoch": 0.049,
      "grad_norm": 0.526339054107666,
      "learning_rate": 0.00019995285680133394,
      "loss": 2.408,
      "step": 49
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6333803534507751,
      "learning_rate": 0.00019995040840893388,
      "loss": 2.4391,
      "step": 50
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.580864906311035,
      "eval_runtime": 4.8038,
      "eval_samples_per_second": 4.372,
      "eval_steps_per_second": 4.372,
      "step": 50
    }
  ],
  "logging_steps": 1,
  "max_steps": 4000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 50,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 2,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 4108715871436800.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}