Training in progress, step 256, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55f63548d61d6c65a1654bc4bf0a1b62fc84fb53fb4d0f3eb1a55325d89267bb
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:d40101cb3554c5d0ee9abbb345dec3e03351f3cc8ceca5279467894c1cd29246
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df0b473c11c1b6b133d909acb6c86cf9a832deb9c2a2162cac061c32e7b84d08
-size 240728084

 version https://git-lfs.github.com/spec/v1
+oid sha256:2552fd74ef5bd48e004d8ae63a0c71c30b9b81b8546aca6ef459461e383a2778
+size 240728404

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:703f8b43f2697cc055bff9862430a4543fd4d2968318f68935d73a18b734e1eb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b9365a698c9eb8b230edf4bfa8724f76b1a22ec8cb4f788c7cb4ab8bca424d5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.015516311320240098,
   "eval_steps": 500,
-  "global_step": 240,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1687,6 +1687,118 @@
       "learning_rate": 9.999239668895627e-06,
       "loss": 1.4116,
       "step": 240
     }
   ],
   "logging_steps": 1,
@@ -1706,7 +1818,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.536003023152988e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.01655073207492277,
   "eval_steps": 500,
+  "global_step": 256,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.999239668895627e-06,
       "loss": 1.4116,
       "step": 240
+    },
+    {
+      "epoch": 0.015580962617407766,
+      "grad_norm": 4.260445594787598,
+      "learning_rate": 9.999221673982747e-06,
+      "loss": 1.3179,
+      "step": 241
+    },
+    {
+      "epoch": 0.015645613914575432,
+      "grad_norm": 4.778342247009277,
+      "learning_rate": 9.999203468625017e-06,
+      "loss": 1.3185,
+      "step": 242
+    },
+    {
+      "epoch": 0.0157102652117431,
+      "grad_norm": 3.723858594894409,
+      "learning_rate": 9.999185052823207e-06,
+      "loss": 1.3,
+      "step": 243
+    },
+    {
+      "epoch": 0.015774916508910768,
+      "grad_norm": 3.748918294906616,
+      "learning_rate": 9.99916642657809e-06,
+      "loss": 1.455,
+      "step": 244
+    },
+    {
+      "epoch": 0.015839567806078435,
+      "grad_norm": 4.436662197113037,
+      "learning_rate": 9.999147589890452e-06,
+      "loss": 1.3895,
+      "step": 245
+    },
+    {
+      "epoch": 0.0159042191032461,
+      "grad_norm": 4.519418716430664,
+      "learning_rate": 9.999128542761085e-06,
+      "loss": 1.2948,
+      "step": 246
+    },
+    {
+      "epoch": 0.015968870400413768,
+      "grad_norm": 4.407564640045166,
+      "learning_rate": 9.99910928519079e-06,
+      "loss": 1.5275,
+      "step": 247
+    },
+    {
+      "epoch": 0.016033521697581436,
+      "grad_norm": 4.254813194274902,
+      "learning_rate": 9.999089817180378e-06,
+      "loss": 1.3428,
+      "step": 248
+    },
+    {
+      "epoch": 0.016098172994749103,
+      "grad_norm": 4.610138893127441,
+      "learning_rate": 9.999070138730668e-06,
+      "loss": 1.3733,
+      "step": 249
+    },
+    {
+      "epoch": 0.01616282429191677,
+      "grad_norm": 3.6939423084259033,
+      "learning_rate": 9.99905024984249e-06,
+      "loss": 1.3943,
+      "step": 250
+    },
+    {
+      "epoch": 0.016227475589084436,
+      "grad_norm": 3.755028247833252,
+      "learning_rate": 9.999030150516681e-06,
+      "loss": 1.4256,
+      "step": 251
+    },
+    {
+      "epoch": 0.016292126886252103,
+      "grad_norm": 4.2649149894714355,
+      "learning_rate": 9.999009840754085e-06,
+      "loss": 1.4257,
+      "step": 252
+    },
+    {
+      "epoch": 0.01635677818341977,
+      "grad_norm": 3.718479633331299,
+      "learning_rate": 9.998989320555562e-06,
+      "loss": 1.3312,
+      "step": 253
+    },
+    {
+      "epoch": 0.01642142948058744,
+      "grad_norm": 3.7253224849700928,
+      "learning_rate": 9.998968589921969e-06,
+      "loss": 1.37,
+      "step": 254
+    },
+    {
+      "epoch": 0.016486080777755104,
+      "grad_norm": 3.8125829696655273,
+      "learning_rate": 9.998947648854182e-06,
+      "loss": 1.3721,
+      "step": 255
+    },
+    {
+      "epoch": 0.01655073207492277,
+      "grad_norm": 4.105193138122559,
+      "learning_rate": 9.998926497353084e-06,
+      "loss": 1.3238,
+      "step": 256
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.637374085936087e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null