Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4278a6793f478e3afab8865caccbb20b2b0419791b3fd8c21c475b3ab2d23191
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:86108a569f2c6a27e5992969edbb9a046fd0653a21c756b76c9b9dfdc2469941
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec9b473bb78b3e23b7d80e81fc3d3a9f2ab7e4bb1b3d3e7301674cd1f365bdc4
 size 51613668

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e343da8721f201900fa24bb1e57f36ae75bacc30aea7af566fbb3657e196968
 size 51613668

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6b701c95189e92437a94e3e9998838034b63a2012928b438aa7581de8f020d8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:05100aa6d7a184b9ee32fdd6461077ba32634042ed9f6e25aded76cc38687f88
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6619084477424622,
-  "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.06502554575011611,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 42.782,
       "eval_steps_per_second": 10.698,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2544,12 +2902,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.37583474041815e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6609265804290771,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.07431490942870414,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 42.782,
       "eval_steps_per_second": 10.698,
       "step": 350
+    },
+    {
+      "epoch": 0.06521133302368788,
+      "grad_norm": 0.5838446617126465,
+      "learning_rate": 3.844650207332562e-06,
+      "loss": 0.6433,
+      "step": 351
+    },
+    {
+      "epoch": 0.06539712029725964,
+      "grad_norm": 0.7719784379005432,
+      "learning_rate": 3.691267552111183e-06,
+      "loss": 0.8205,
+      "step": 352
+    },
+    {
+      "epoch": 0.06558290757083139,
+      "grad_norm": 0.8239154815673828,
+      "learning_rate": 3.54088980417534e-06,
+      "loss": 0.8143,
+      "step": 353
+    },
+    {
+      "epoch": 0.06576869484440316,
+      "grad_norm": 0.8158086538314819,
+      "learning_rate": 3.393526721321616e-06,
+      "loss": 0.756,
+      "step": 354
+    },
+    {
+      "epoch": 0.06595448211797492,
+      "grad_norm": 0.8154739737510681,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 0.7736,
+      "step": 355
+    },
+    {
+      "epoch": 0.06614026939154669,
+      "grad_norm": 0.8578819632530212,
+      "learning_rate": 3.1078826033397843e-06,
+      "loss": 0.6857,
+      "step": 356
+    },
+    {
+      "epoch": 0.06632605666511844,
+      "grad_norm": 0.7902318239212036,
+      "learning_rate": 2.9696201032491434e-06,
+      "loss": 0.786,
+      "step": 357
+    },
+    {
+      "epoch": 0.0665118439386902,
+      "grad_norm": 0.7309139966964722,
+      "learning_rate": 2.8344093371128424e-06,
+      "loss": 0.6343,
+      "step": 358
+    },
+    {
+      "epoch": 0.06669763121226197,
+      "grad_norm": 0.6805800199508667,
+      "learning_rate": 2.70225907856374e-06,
+      "loss": 0.564,
+      "step": 359
+    },
+    {
+      "epoch": 0.06688341848583372,
+      "grad_norm": 0.6480996012687683,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 0.6257,
+      "step": 360
+    },
+    {
+      "epoch": 0.06706920575940548,
+      "grad_norm": 0.5820838809013367,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.5526,
+      "step": 361
+    },
+    {
+      "epoch": 0.06725499303297724,
+      "grad_norm": 0.7131157517433167,
+      "learning_rate": 2.324256102563188e-06,
+      "loss": 0.6689,
+      "step": 362
+    },
+    {
+      "epoch": 0.067440780306549,
+      "grad_norm": 0.6618325710296631,
+      "learning_rate": 2.204431630583548e-06,
+      "loss": 0.5775,
+      "step": 363
+    },
+    {
+      "epoch": 0.06762656758012076,
+      "grad_norm": 0.6456477046012878,
+      "learning_rate": 2.087708544541689e-06,
+      "loss": 0.554,
+      "step": 364
+    },
+    {
+      "epoch": 0.06781235485369252,
+      "grad_norm": 0.6535434126853943,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 0.6671,
+      "step": 365
+    },
+    {
+      "epoch": 0.06799814212726428,
+      "grad_norm": 0.7245746850967407,
+      "learning_rate": 1.8635966245104664e-06,
+      "loss": 0.6361,
+      "step": 366
+    },
+    {
+      "epoch": 0.06818392940083605,
+      "grad_norm": 0.573853075504303,
+      "learning_rate": 1.7562223328224325e-06,
+      "loss": 0.5036,
+      "step": 367
+    },
+    {
+      "epoch": 0.0683697166744078,
+      "grad_norm": 0.6435849070549011,
+      "learning_rate": 1.6519785107311891e-06,
+      "loss": 0.5991,
+      "step": 368
+    },
+    {
+      "epoch": 0.06855550394797956,
+      "grad_norm": 0.6336526870727539,
+      "learning_rate": 1.5508719224689717e-06,
+      "loss": 0.5897,
+      "step": 369
+    },
+    {
+      "epoch": 0.06874129122155133,
+      "grad_norm": 0.5985144376754761,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 0.5156,
+      "step": 370
+    },
+    {
+      "epoch": 0.06892707849512308,
+      "grad_norm": 0.6162166595458984,
+      "learning_rate": 1.358096486081778e-06,
+      "loss": 0.624,
+      "step": 371
+    },
+    {
+      "epoch": 0.06911286576869484,
+      "grad_norm": 0.5633879899978638,
+      "learning_rate": 1.2664401468786114e-06,
+      "loss": 0.548,
+      "step": 372
+    },
+    {
+      "epoch": 0.06929865304226661,
+      "grad_norm": 0.7402003407478333,
+      "learning_rate": 1.1779460585363944e-06,
+      "loss": 0.6961,
+      "step": 373
+    },
+    {
+      "epoch": 0.06948444031583836,
+      "grad_norm": 0.6589695811271667,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.6001,
+      "step": 374
+    },
+    {
+      "epoch": 0.06967022758941012,
+      "grad_norm": 0.5838775634765625,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 0.4839,
+      "step": 375
+    },
+    {
+      "epoch": 0.06985601486298189,
+      "grad_norm": 0.6574169397354126,
+      "learning_rate": 9.314936930293283e-07,
+      "loss": 0.6251,
+      "step": 376
+    },
+    {
+      "epoch": 0.07004180213655364,
+      "grad_norm": 0.6504446268081665,
+      "learning_rate": 8.557039732283944e-07,
+      "loss": 0.6724,
+      "step": 377
+    },
+    {
+      "epoch": 0.07022758941012541,
+      "grad_norm": 0.7052487134933472,
+      "learning_rate": 7.83103156370113e-07,
+      "loss": 0.6968,
+      "step": 378
+    },
+    {
+      "epoch": 0.07041337668369717,
+      "grad_norm": 0.667565643787384,
+      "learning_rate": 7.136959534174592e-07,
+      "loss": 0.688,
+      "step": 379
+    },
+    {
+      "epoch": 0.07059916395726892,
+      "grad_norm": 0.663486897945404,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 0.612,
+      "step": 380
+    },
+    {
+      "epoch": 0.07078495123084069,
+      "grad_norm": 0.7024902105331421,
+      "learning_rate": 5.844801966434832e-07,
+      "loss": 0.6131,
+      "step": 381
+    },
+    {
+      "epoch": 0.07097073850441245,
+      "grad_norm": 0.5356816053390503,
+      "learning_rate": 5.246800274474439e-07,
+      "loss": 0.5105,
+      "step": 382
+    },
+    {
+      "epoch": 0.0711565257779842,
+      "grad_norm": 0.6758443713188171,
+      "learning_rate": 4.680902408635335e-07,
+      "loss": 0.564,
+      "step": 383
+    },
+    {
+      "epoch": 0.07134231305155597,
+      "grad_norm": 0.658290445804596,
+      "learning_rate": 4.1471450892189846e-07,
+      "loss": 0.5886,
+      "step": 384
+    },
+    {
+      "epoch": 0.07152810032512773,
+      "grad_norm": 0.7062132954597473,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 0.6998,
+      "step": 385
+    },
+    {
+      "epoch": 0.07171388759869948,
+      "grad_norm": 0.6337407231330872,
+      "learning_rate": 3.1761885408435054e-07,
+      "loss": 0.6461,
+      "step": 386
+    },
+    {
+      "epoch": 0.07189967487227125,
+      "grad_norm": 0.6352119445800781,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.585,
+      "step": 387
+    },
+    {
+      "epoch": 0.07208546214584301,
+      "grad_norm": 0.6841195225715637,
+      "learning_rate": 2.334182641175686e-07,
+      "loss": 0.6587,
+      "step": 388
+    },
+    {
+      "epoch": 0.07227124941941478,
+      "grad_norm": 0.7172592282295227,
+      "learning_rate": 1.9616057881935436e-07,
+      "loss": 0.6138,
+      "step": 389
+    },
+    {
+      "epoch": 0.07245703669298653,
+      "grad_norm": 0.7642425298690796,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 0.7243,
+      "step": 390
+    },
+    {
+      "epoch": 0.07264282396655829,
+      "grad_norm": 0.752518355846405,
+      "learning_rate": 1.3134251542544774e-07,
+      "loss": 0.7156,
+      "step": 391
+    },
+    {
+      "epoch": 0.07282861124013006,
+      "grad_norm": 0.7578738331794739,
+      "learning_rate": 1.0378634328099269e-07,
+      "loss": 0.7513,
+      "step": 392
+    },
+    {
+      "epoch": 0.07301439851370181,
+      "grad_norm": 0.7395659685134888,
+      "learning_rate": 7.946786493666647e-08,
+      "loss": 0.7112,
+      "step": 393
+    },
+    {
+      "epoch": 0.07320018578727357,
+      "grad_norm": 0.7067527174949646,
+      "learning_rate": 5.838865838366792e-08,
+      "loss": 0.7645,
+      "step": 394
+    },
+    {
+      "epoch": 0.07338597306084534,
+      "grad_norm": 0.7345956563949585,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 0.8043,
+      "step": 395
+    },
+    {
+      "epoch": 0.07357176033441709,
+      "grad_norm": 0.7250311374664307,
+      "learning_rate": 2.595332156925534e-08,
+      "loss": 0.7098,
+      "step": 396
+    },
+    {
+      "epoch": 0.07375754760798885,
+      "grad_norm": 0.7960325479507446,
+      "learning_rate": 1.4599295990352924e-08,
+      "loss": 0.6743,
+      "step": 397
+    },
+    {
+      "epoch": 0.07394333488156062,
+      "grad_norm": 0.6739823222160339,
+      "learning_rate": 6.488751431266149e-09,
+      "loss": 0.6511,
+      "step": 398
+    },
+    {
+      "epoch": 0.07412912215513237,
+      "grad_norm": 0.8577350974082947,
+      "learning_rate": 1.622214173602199e-09,
+      "loss": 0.7901,
+      "step": 399
+    },
+    {
+      "epoch": 0.07431490942870414,
+      "grad_norm": 0.751978874206543,
+      "learning_rate": 0.0,
+      "loss": 0.6483,
+      "step": 400
+    },
+    {
+      "epoch": 0.07431490942870414,
+      "eval_loss": 0.6609265804290771,
+      "eval_runtime": 211.7075,
+      "eval_samples_per_second": 42.823,
+      "eval_steps_per_second": 10.708,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.563453099134157e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null