Training in progress, step 208, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b63e87e68bd6b0e7db190c08a005cbe1b19645de1654d4a6dbf7a6ef2dfcb4d
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:33d1f902086bf162f42a891580c1d8f009b186a1b247fa2e5d0f3c8b552ca438
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3baa6c474e91338e28d2c11997d97f2255b26f7552ed683357ff6a193f69797
 size 240728084

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffbe6c82a241eec13776bec8b0245f431cae9a909fe4fa531b5a99f34a39e259
 size 240728084

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:089115101bfed8297ba3fb18cc84d56ea340bae11356e34eac025d2beac1caf3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fb5768ef05fd83332549c08f206d2683f104437b7d63ada2cc0d97372b46d74
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01241304905619208,
   "eval_steps": 500,
-  "global_step": 192,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1351,6 +1351,118 @@
       "learning_rate": 9.999855928766113e-06,
       "loss": 1.4314,
       "step": 192
     }
   ],
   "logging_steps": 1,
@@ -1370,7 +1482,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.232522107274281e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.013447469810874753,
   "eval_steps": 500,
+  "global_step": 208,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.999855928766113e-06,
       "loss": 1.4314,
       "step": 192
+    },
+    {
+      "epoch": 0.012477700353359746,
+      "grad_norm": 3.9474074840545654,
+      "learning_rate": 9.99984803593353e-06,
+      "loss": 1.4435,
+      "step": 193
+    },
+    {
+      "epoch": 0.012542351650527413,
+      "grad_norm": 4.373626232147217,
+      "learning_rate": 9.999839932629732e-06,
+      "loss": 1.3644,
+      "step": 194
+    },
+    {
+      "epoch": 0.01260700294769508,
+      "grad_norm": 4.185675621032715,
+      "learning_rate": 9.999831618855058e-06,
+      "loss": 1.3399,
+      "step": 195
+    },
+    {
+      "epoch": 0.012671654244862747,
+      "grad_norm": 4.8992109298706055,
+      "learning_rate": 9.999823094609862e-06,
+      "loss": 1.2623,
+      "step": 196
+    },
+    {
+      "epoch": 0.012736305542030415,
+      "grad_norm": 4.317060470581055,
+      "learning_rate": 9.999814359894501e-06,
+      "loss": 1.5297,
+      "step": 197
+    },
+    {
+      "epoch": 0.012800956839198081,
+      "grad_norm": 4.501911640167236,
+      "learning_rate": 9.999805414709344e-06,
+      "loss": 1.4305,
+      "step": 198
+    },
+    {
+      "epoch": 0.01286560813636575,
+      "grad_norm": 4.288606643676758,
+      "learning_rate": 9.999796259054765e-06,
+      "loss": 1.4358,
+      "step": 199
+    },
+    {
+      "epoch": 0.012930259433533415,
+      "grad_norm": 4.692774772644043,
+      "learning_rate": 9.99978689293115e-06,
+      "loss": 1.4448,
+      "step": 200
+    },
+    {
+      "epoch": 0.012994910730701083,
+      "grad_norm": 4.893410682678223,
+      "learning_rate": 9.999777316338897e-06,
+      "loss": 1.4313,
+      "step": 201
+    },
+    {
+      "epoch": 0.01305956202786875,
+      "grad_norm": 4.01968240737915,
+      "learning_rate": 9.999767529278403e-06,
+      "loss": 1.3831,
+      "step": 202
+    },
+    {
+      "epoch": 0.013124213325036417,
+      "grad_norm": 4.3122076988220215,
+      "learning_rate": 9.999757531750086e-06,
+      "loss": 1.3605,
+      "step": 203
+    },
+    {
+      "epoch": 0.013188864622204083,
+      "grad_norm": 3.9625604152679443,
+      "learning_rate": 9.999747323754363e-06,
+      "loss": 1.2944,
+      "step": 204
+    },
+    {
+      "epoch": 0.013253515919371751,
+      "grad_norm": 4.135870456695557,
+      "learning_rate": 9.999736905291664e-06,
+      "loss": 1.3465,
+      "step": 205
+    },
+    {
+      "epoch": 0.013318167216539419,
+      "grad_norm": 3.485560655593872,
+      "learning_rate": 9.999726276362429e-06,
+      "loss": 1.4901,
+      "step": 206
+    },
+    {
+      "epoch": 0.013382818513707085,
+      "grad_norm": 4.223531246185303,
+      "learning_rate": 9.999715436967104e-06,
+      "loss": 1.4342,
+      "step": 207
+    },
+    {
+      "epoch": 0.013447469810874753,
+      "grad_norm": 4.688872814178467,
+      "learning_rate": 9.999704387106147e-06,
+      "loss": 1.3735,
+      "step": 208
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.334557816141824e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null