Training in progress, step 280, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +91 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75a3e84096039afb527d22d691d180e109ca9921f708bdaa27632df4487a4260
 size 125048

 version https://git-lfs.github.com/spec/v1
+oid sha256:a61b56978b2452570a76d51bb39ac908f59f374a2960576a7c171f54e7ff4a4b
 size 125048

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc194bba403caad9cbc17f9f6c4159d35cde33e5cba286cd96d11edced40608d
 size 162868

 version https://git-lfs.github.com/spec/v1
+oid sha256:f05cd8f7fc6effc7eb3d50ca7c74768114cbda4c7a753cd908b737e12f3421cf
 size 162868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3991cb24901cdd4ded826a1eb99233632b9b31143f5465b97735bc74e1caa25
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c42d8efe8b5044ea7806228bc26e27f1820a2789f8e060576ef9893082177ba2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:765f5571460aced30b253ddf135511867127c526d96f703a3f7058177ad62b46
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:322d234cf66e33210ecc86dafc19f666ddaa73050355080bdffd03ad06871557
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.01980209350586,
-  "best_model_checkpoint": "miner_id_24/checkpoint-270",
-  "epoch": 0.012203114053919686,
   "eval_steps": 5,
-  "global_step": 270,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2337,6 +2337,92 @@
       "eval_samples_per_second": 52.889,
       "eval_steps_per_second": 26.448,
       "step": 270
     }
   ],
   "logging_steps": 1,
@@ -2365,7 +2451,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2836817510400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.01966381072998,
+  "best_model_checkpoint": "miner_id_24/checkpoint-280",
+  "epoch": 0.012655081241101896,
   "eval_steps": 5,
+  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 52.889,
       "eval_steps_per_second": 26.448,
       "step": 270
+    },
+    {
+      "epoch": 0.012248310772637907,
+      "grad_norm": 0.636551022529602,
+      "learning_rate": 8.975972042185687e-05,
+      "loss": 44.1385,
+      "step": 271
+    },
+    {
+      "epoch": 0.012293507491356128,
+      "grad_norm": 0.5031408071517944,
+      "learning_rate": 8.912216437226693e-05,
+      "loss": 44.1121,
+      "step": 272
+    },
+    {
+      "epoch": 0.01233870421007435,
+      "grad_norm": 0.49243634939193726,
+      "learning_rate": 8.848505546789408e-05,
+      "loss": 44.0864,
+      "step": 273
+    },
+    {
+      "epoch": 0.01238390092879257,
+      "grad_norm": 0.47308340668678284,
+      "learning_rate": 8.784841989778996e-05,
+      "loss": 44.0391,
+      "step": 274
+    },
+    {
+      "epoch": 0.012429097647510792,
+      "grad_norm": 0.43966105580329895,
+      "learning_rate": 8.721228383154939e-05,
+      "loss": 44.0969,
+      "step": 275
+    },
+    {
+      "epoch": 0.012429097647510792,
+      "eval_loss": 11.019760131835938,
+      "eval_runtime": 176.1857,
+      "eval_samples_per_second": 52.882,
+      "eval_steps_per_second": 26.444,
+      "step": 275
+    },
+    {
+      "epoch": 0.012474294366229011,
+      "grad_norm": 0.4853382706642151,
+      "learning_rate": 8.657667341823448e-05,
+      "loss": 44.079,
+      "step": 276
+    },
+    {
+      "epoch": 0.012519491084947232,
+      "grad_norm": 0.453819215297699,
+      "learning_rate": 8.594161478529974e-05,
+      "loss": 44.0371,
+      "step": 277
+    },
+    {
+      "epoch": 0.012564687803665453,
+      "grad_norm": 0.4855421483516693,
+      "learning_rate": 8.530713403751821e-05,
+      "loss": 44.0514,
+      "step": 278
+    },
+    {
+      "epoch": 0.012609884522383675,
+      "grad_norm": 0.49890294671058655,
+      "learning_rate": 8.46732572559084e-05,
+      "loss": 44.0561,
+      "step": 279
+    },
+    {
+      "epoch": 0.012655081241101896,
+      "grad_norm": 0.406686007976532,
+      "learning_rate": 8.404001049666211e-05,
+      "loss": 44.0746,
+      "step": 280
+    },
+    {
+      "epoch": 0.012655081241101896,
+      "eval_loss": 11.01966381072998,
+      "eval_runtime": 176.4032,
+      "eval_samples_per_second": 52.817,
+      "eval_steps_per_second": 26.411,
+      "step": 280
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2941884825600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null