Training in progress, step 90, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbd404840ef415cf6dfbf046dbef271613723f868b09920ca116fc5511344b15
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a78d831bede7dd40e477fa7694ee5041a6c2a1c4dd47884d69631517e3003db
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e065f8f89d9eb270a188e83eb48303f6f781a74e0a31f37facb267b939c7c81
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:74f814dbc6958de56222e4762ab077120e81b798376d576addd94b33fb7cc3bf
 size 671466706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a692e3cef582da2efc9bc07079ad5189a13e2ca769346c59ca73b10945b32cb9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea1a5d23166df3ab9a48a77c1526bd281c12abab800d03cded5eed551cd4bb0c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01a4e7013bdf0cfef44b18348c4082377d680816016af2638e8995bf0bb3c8d9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf6a9a1e39c0655c6941309388d2a028b5b8dbbca031ca2500bdfcdc9f2c01aa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.985263157894737,
   "eval_steps": 25,
-  "global_step": 88,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -655,6 +655,20 @@
       "learning_rate": 4.025706004760932e-07,
       "loss": 2.6286,
       "step": 88
     }
   ],
   "logging_steps": 1,
@@ -669,12 +683,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.050382716647768e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.0631578947368423,
   "eval_steps": 25,
+  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.025706004760932e-07,
       "loss": 2.6286,
       "step": 88
+    },
+    {
+      "epoch": 3.0294736842105263,
+      "grad_norm": 0.3721281886100769,
+      "learning_rate": 1.0069334586854107e-07,
+      "loss": 2.1659,
+      "step": 89
+    },
+    {
+      "epoch": 3.0631578947368423,
+      "grad_norm": 0.2612936794757843,
+      "learning_rate": 0.0,
+      "loss": 1.8654,
+      "step": 90
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0742550511170355e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null