Training in progress, step 1800, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ab9a5df5e1c8060d8373b14a51b4f8888fd97811538de37ef80f3d16fb51073
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:72d46c8fe0234ee75f91ec5fa90676188c422627c6ce38b838cd66c10a17fe93
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:217094565747c2b9ed586bc5c7228df1f82c6b477100624d9c248dbb5b158445
 size 85723732

 version https://git-lfs.github.com/spec/v1
+oid sha256:87b644d70458c120b462aad650a3b75789fda13a3765010c9382f03c1857faf3
 size 85723732

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a748b8dddefa8c3474c694e234ef459c2d59739da98f7097037d44c1f8667fb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa6c4fb787dda3b5af96393eea29fe45a41c6f0fda1167422e0eed8251ea6c4d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26b811a0ecde9d77841cf06335777c47657d93b172b0c899d014ac2d55b60923
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:110b2f0ce7be25c09ed998ed1965f38a8d3a448ca6aa07e3d4392461b80d705f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7165031433105469,
   "best_model_checkpoint": "miner_id_24/checkpoint-1500",
-  "epoch": 0.1881145788798632,
   "eval_steps": 150,
-  "global_step": 1650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1258,6 +1258,119 @@
       "eval_samples_per_second": 24.851,
       "eval_steps_per_second": 6.214,
       "step": 1650
     }
   ],
   "logging_steps": 10,
@@ -1272,7 +1385,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -1281,12 +1394,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.800825007898624e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7165031433105469,
   "best_model_checkpoint": "miner_id_24/checkpoint-1500",
+  "epoch": 0.20521590423257802,
   "eval_steps": 150,
+  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 24.851,
       "eval_steps_per_second": 6.214,
       "step": 1650
+    },
+    {
+      "epoch": 0.18925466723671083,
+      "grad_norm": 20.21547508239746,
+      "learning_rate": 0.0001,
+      "loss": 3.5619,
+      "step": 1660
+    },
+    {
+      "epoch": 0.1903947555935585,
+      "grad_norm": 13.824969291687012,
+      "learning_rate": 0.0001,
+      "loss": 2.724,
+      "step": 1670
+    },
+    {
+      "epoch": 0.19153484395040615,
+      "grad_norm": 19.375152587890625,
+      "learning_rate": 0.0001,
+      "loss": 2.62,
+      "step": 1680
+    },
+    {
+      "epoch": 0.1926749323072538,
+      "grad_norm": 16.543657302856445,
+      "learning_rate": 0.0001,
+      "loss": 2.7389,
+      "step": 1690
+    },
+    {
+      "epoch": 0.19381502066410147,
+      "grad_norm": 42.59777069091797,
+      "learning_rate": 0.0001,
+      "loss": 3.038,
+      "step": 1700
+    },
+    {
+      "epoch": 0.19495510902094912,
+      "grad_norm": 18.97529411315918,
+      "learning_rate": 0.0001,
+      "loss": 3.3123,
+      "step": 1710
+    },
+    {
+      "epoch": 0.1960951973777968,
+      "grad_norm": 16.993947982788086,
+      "learning_rate": 0.0001,
+      "loss": 2.5807,
+      "step": 1720
+    },
+    {
+      "epoch": 0.19723528573464444,
+      "grad_norm": 16.720861434936523,
+      "learning_rate": 0.0001,
+      "loss": 2.711,
+      "step": 1730
+    },
+    {
+      "epoch": 0.19837537409149208,
+      "grad_norm": 17.76796531677246,
+      "learning_rate": 0.0001,
+      "loss": 2.4549,
+      "step": 1740
+    },
+    {
+      "epoch": 0.19951546244833976,
+      "grad_norm": 26.065580368041992,
+      "learning_rate": 0.0001,
+      "loss": 3.1208,
+      "step": 1750
+    },
+    {
+      "epoch": 0.2006555508051874,
+      "grad_norm": 24.055755615234375,
+      "learning_rate": 0.0001,
+      "loss": 3.5162,
+      "step": 1760
+    },
+    {
+      "epoch": 0.20179563916203505,
+      "grad_norm": 16.346284866333008,
+      "learning_rate": 0.0001,
+      "loss": 2.7431,
+      "step": 1770
+    },
+    {
+      "epoch": 0.20293572751888272,
+      "grad_norm": 14.49986457824707,
+      "learning_rate": 0.0001,
+      "loss": 2.6966,
+      "step": 1780
+    },
+    {
+      "epoch": 0.20407581587573037,
+      "grad_norm": 15.518335342407227,
+      "learning_rate": 0.0001,
+      "loss": 2.5071,
+      "step": 1790
+    },
+    {
+      "epoch": 0.20521590423257802,
+      "grad_norm": 22.37300682067871,
+      "learning_rate": 0.0001,
+      "loss": 3.201,
+      "step": 1800
+    },
+    {
+      "epoch": 0.20521590423257802,
+      "eval_loss": 0.7226201295852661,
+      "eval_runtime": 297.2798,
+      "eval_samples_per_second": 24.849,
+      "eval_steps_per_second": 6.213,
+      "step": 1800
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.328172735889408e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null