Training in progress, step 39, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8e90f8b2d9efe89ecf6005b90cee9f2ff75ee0ae4bad05048bf3b5ed194c681
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:50eaa409ee5f0a93f2c540fbca54989c5aefa21b1273c5d5b1d886ba09a20469
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f41172cfd16f5eea55c82a17063631317baf49c5f4d5ecc40f7960c2172a8ef5
 size 85723284

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef68d332673955da9d5dae7c171321f7a76837999c66b0e34d0f03757d7a5890
 size 85723284

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e97b7009911df3c6dbe7894b6ee9da598c21826b98bea0a9fff09e958541373f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:84783094152d26fbafe146a53398686233c3cc9dfb087efb955ba19f4d58f158
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e478bf29fddd2bf8f7498cec7a413de2c2afd2062f64a9f38a4ef3c5a20e6a2d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:aae2c39cb89433419e485dd995b7c5858a698c39b6628327b4757d3c15aab5bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.06863553868998515,
   "eval_steps": 13,
-  "global_step": 26,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -213,6 +213,105 @@
       "eval_samples_per_second": 5.568,
       "eval_steps_per_second": 2.786,
       "step": 26
     }
   ],
   "logging_steps": 1,
@@ -232,7 +331,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.086251099611464e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.10295330803497772,
   "eval_steps": 13,
+  "global_step": 39,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.568,
       "eval_steps_per_second": 2.786,
       "step": 26
+    },
+    {
+      "epoch": 0.07127536710113842,
+      "grad_norm": 3.462208769633435e-05,
+      "learning_rate": 0.0001401895306154785,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 0.0739151955122917,
+      "grad_norm": 2.7388192393118516e-05,
+      "learning_rate": 0.00013042107116699228,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 0.07655502392344497,
+      "grad_norm": 3.9361602830467746e-05,
+      "learning_rate": 0.00012073645169758076,
+      "loss": 0.0,
+      "step": 29
+    },
+    {
+      "epoch": 0.07919485233459825,
+      "grad_norm": 2.9102855478413403e-05,
+      "learning_rate": 0.00011117714323462186,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 0.08183468074575152,
+      "grad_norm": 2.171610321966e-05,
+      "learning_rate": 0.00010178408020452579,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 0.0844745091569048,
+      "grad_norm": 2.040547587967012e-05,
+      "learning_rate": 9.259748514523653e-05,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 0.08711433756805807,
+      "grad_norm": 1.634558975638356e-05,
+      "learning_rate": 8.365669646714983e-05,
+      "loss": 0.0,
+      "step": 33
+    },
+    {
+      "epoch": 0.08975416597921135,
+      "grad_norm": 2.2022310076863505e-05,
+      "learning_rate": 7.500000000000002e-05,
+      "loss": 0.0,
+      "step": 34
+    },
+    {
+      "epoch": 0.09239399439036462,
+      "grad_norm": 1.6614567357464693e-05,
+      "learning_rate": 6.66644650470597e-05,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 0.0950338228015179,
+      "grad_norm": 1.5442792573594488e-05,
+      "learning_rate": 5.8685785648691894e-05,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 0.09767365121267117,
+      "grad_norm": 1.3897730241296813e-05,
+      "learning_rate": 5.109812773498967e-05,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 0.10031347962382445,
+      "grad_norm": 1.476151192036923e-05,
+      "learning_rate": 4.3933982822017876e-05,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 0.10295330803497772,
+      "grad_norm": 1.7724401914165355e-05,
+      "learning_rate": 3.72240288781534e-05,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 0.10295330803497772,
+      "eval_loss": 1.1696874935296364e-07,
+      "eval_runtime": 229.2689,
+      "eval_samples_per_second": 5.57,
+      "eval_steps_per_second": 2.787,
+      "step": 39
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.6293766494171955e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null