Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55bde86d5915de081772b34a16be17ff2e5d08dabaf800c1dbf95f4536eade34
 size 83115256

 version https://git-lfs.github.com/spec/v1
+oid sha256:8cfca774e0e4b15eb3d5db468fb58dba8558863d5dd85202fa6b18e632f5362b
 size 83115256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3690f69df951d8b4f534212098ba547282f4b2f488b4f03456e936d1f35bb58
 size 42608772

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3ec5e7c7a66f68d52640ed1ec2601d9ee9952d803f4403489599063c3e13fdf
 size 42608772

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcc584fdffa765c7b7d097d7463383fb67e4efb3117fc522e636ada01b5d5461
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b3f655ff85d03725f60fb2deecc05d59c493267c3f8f6447c5180c7397f52dc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:303e83c678e93581d63a3650d04963a2cf914298a9658799e3e1f49cf7c8604f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f03fffc232f1b8a6305d6fa9076c1e1bca28e4d7666df088caa1812431cf9135
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1326929330825806,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.30995738086013175,
   "eval_steps": 50,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -359,6 +359,49 @@
       "eval_samples_per_second": 28.003,
       "eval_steps_per_second": 7.001,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -387,7 +430,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.0834807071113216e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.1311696767807007,
+  "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.3487020534676482,
   "eval_steps": 50,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 28.003,
       "eval_steps_per_second": 7.001,
       "step": 400
+    },
+    {
+      "epoch": 0.317706315381635,
+      "grad_norm": 1.2059953212738037,
+      "learning_rate": 1.9384775070942844e-05,
+      "loss": 0.8901,
+      "step": 410
+    },
+    {
+      "epoch": 0.32545524990313834,
+      "grad_norm": 1.4342598915100098,
+      "learning_rate": 1.5423118240122765e-05,
+      "loss": 1.1229,
+      "step": 420
+    },
+    {
+      "epoch": 0.3332041844246416,
+      "grad_norm": 1.244903802871704,
+      "learning_rate": 1.188081932481891e-05,
+      "loss": 1.1037,
+      "step": 430
+    },
+    {
+      "epoch": 0.3409531189461449,
+      "grad_norm": 1.8167920112609863,
+      "learning_rate": 8.775136049276001e-06,
+      "loss": 1.131,
+      "step": 440
+    },
+    {
+      "epoch": 0.3487020534676482,
+      "grad_norm": 2.944375991821289,
+      "learning_rate": 6.121198990230306e-06,
+      "loss": 1.0648,
+      "step": 450
+    },
+    {
+      "epoch": 0.3487020534676482,
+      "eval_loss": 1.1311696767807007,
+      "eval_runtime": 19.3838,
+      "eval_samples_per_second": 28.065,
+      "eval_steps_per_second": 7.016,
+      "step": 450
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.344858262647603e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null