Training in progress, step 212, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +88 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3bf6d6a2180aaa44b0684de1c1b4c44c876e3b61e90774ecf34f9d75d0e5306
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f3ae69127fc293d24c01e08524b58c279e8c5f0ae7b6c7a1ef22f6be3e77291
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:753552200a46d3e7e4d375873d1c04cbc4b9acbcf9d4abe320d73e382fdda544
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdcf04cb6bf4c0b0d7b9f8f1eb99e7b5124bbb4f4b8805d7916763277c22a05b
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7df43c4526bd9a8ecddc4847c60b296a89597a43586e7d1e215a1b5c2cbd258
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:27cc510000b450ada0ab6c2ce09449d46c05dfdfaf1135374ca981c94e122f97
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f462ed10ef931ef3980d2aa020052a49dcadd400550d826d7ccc8a4e298b95f9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b80340678794eceb27a8ea7f91bc6361013d570f26cb607a0c954f150a1a4c00
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.1042157411575317,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.9456264775413712,
   "eval_steps": 50,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1447,6 +1447,90 @@
       "eval_samples_per_second": 9.275,
       "eval_steps_per_second": 2.338,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1470,12 +1554,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.6742965062651085e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.1042157411575317,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.0023640661938533,
   "eval_steps": 50,
+  "global_step": 212,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.275,
       "eval_steps_per_second": 2.338,
       "step": 200
+    },
+    {
+      "epoch": 0.950354609929078,
+      "grad_norm": 0.3739728331565857,
+      "learning_rate": 7.29899235565934e-07,
+      "loss": 1.1624,
+      "step": 201
+    },
+    {
+      "epoch": 0.9550827423167849,
+      "grad_norm": 0.35657617449760437,
+      "learning_rate": 6.034780129621664e-07,
+      "loss": 1.0881,
+      "step": 202
+    },
+    {
+      "epoch": 0.9598108747044918,
+      "grad_norm": 0.37702855467796326,
+      "learning_rate": 4.890044990165321e-07,
+      "loss": 1.1991,
+      "step": 203
+    },
+    {
+      "epoch": 0.9645390070921985,
+      "grad_norm": 0.3940827250480652,
+      "learning_rate": 3.8650638183617694e-07,
+      "loss": 1.1217,
+      "step": 204
+    },
+    {
+      "epoch": 0.9692671394799054,
+      "grad_norm": 0.3676983118057251,
+      "learning_rate": 2.9600845299737056e-07,
+      "loss": 1.0818,
+      "step": 205
+    },
+    {
+      "epoch": 0.9739952718676123,
+      "grad_norm": 0.399383008480072,
+      "learning_rate": 2.1753260154906973e-07,
+      "loss": 1.0898,
+      "step": 206
+    },
+    {
+      "epoch": 0.9787234042553191,
+      "grad_norm": 0.3540259599685669,
+      "learning_rate": 1.5109780871853663e-07,
+      "loss": 1.0337,
+      "step": 207
+    },
+    {
+      "epoch": 0.983451536643026,
+      "grad_norm": 0.3893062174320221,
+      "learning_rate": 9.672014332028356e-08,
+      "loss": 0.9161,
+      "step": 208
+    },
+    {
+      "epoch": 0.9881796690307328,
+      "grad_norm": 0.39612168073654175,
+      "learning_rate": 5.4412757869459763e-08,
+      "loss": 1.0683,
+      "step": 209
+    },
+    {
+      "epoch": 0.9929078014184397,
+      "grad_norm": 0.443149209022522,
+      "learning_rate": 2.4185885400596075e-08,
+      "loss": 1.0856,
+      "step": 210
+    },
+    {
+      "epoch": 0.9976359338061466,
+      "grad_norm": 0.46631062030792236,
+      "learning_rate": 6.04683699252373e-09,
+      "loss": 1.101,
+      "step": 211
+    },
+    {
+      "epoch": 1.0023640661938533,
+      "grad_norm": 0.9580826759338379,
+      "learning_rate": 0.0,
+      "loss": 1.865,
+      "step": 212
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.8356191188942848e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null