Training in progress, step 1200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98f9852468be1d11f79a6afd8e8e8e90ab82b65b6630bfe27bf9d7258aa53760
 size 653434568

 version https://git-lfs.github.com/spec/v1
+oid sha256:1069acd7dbeb0a53054f21d4748045d3b0cd75d1c7844eea3e3c04cd84a1c1c6
 size 653434568

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46f5bcafb3391b66bf71dd49e132600d898d53858b6ef128a4473be5f14bbd54
 size 1288533754

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddffa107f91c19c05fdc252823136fe52818fe6d47b0afd3b51276eb3eaaf4c6
 size 1288533754

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8baf1c7e69a5025f19d25ee5fab7f4ab7f55b412ae064b6fb683da98fc8d4be9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:023bcf31226b6f20365e44211b51277c48818151bb707e207a8261688b6af12e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cf334b45c59ec3117af6235b0cb6b8da2aab8bbc78c388edc1a1925c6731983
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:9700e65a0b3380601a1015680e2c133feb2eaf2c92236b0244cca4087d160954
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6903132200241089,
-  "best_model_checkpoint": "./output/checkpoint-1050",
-  "epoch": 0.0931016137613052,
   "eval_steps": 150,
-  "global_step": 1050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -798,6 +798,119 @@
       "eval_samples_per_second": 8.421,
       "eval_steps_per_second": 8.421,
       "step": 1050
     }
   ],
   "logging_steps": 10,
@@ -817,7 +930,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.53861980121088e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.6847599744796753,
+  "best_model_checkpoint": "./output/checkpoint-1200",
+  "epoch": 0.10640184429863452,
   "eval_steps": 150,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.421,
       "eval_steps_per_second": 8.421,
       "step": 1050
+    },
+    {
+      "epoch": 0.09398829579712716,
+      "grad_norm": 6.51155948638916,
+      "learning_rate": 9.082436730845996e-05,
+      "loss": 1.5121,
+      "step": 1060
+    },
+    {
+      "epoch": 0.0948749778329491,
+      "grad_norm": 7.315041542053223,
+      "learning_rate": 9.063844375723016e-05,
+      "loss": 1.45,
+      "step": 1070
+    },
+    {
+      "epoch": 0.09576165986877105,
+      "grad_norm": 9.287749290466309,
+      "learning_rate": 9.045084971874741e-05,
+      "loss": 1.6892,
+      "step": 1080
+    },
+    {
+      "epoch": 0.09664834190459301,
+      "grad_norm": 7.6157097816467285,
+      "learning_rate": 9.026159290426783e-05,
+      "loss": 1.832,
+      "step": 1090
+    },
+    {
+      "epoch": 0.09753502394041497,
+      "grad_norm": 6.081124782562256,
+      "learning_rate": 9.007068109339786e-05,
+      "loss": 1.6911,
+      "step": 1100
+    },
+    {
+      "epoch": 0.09842170597623692,
+      "grad_norm": 7.2468671798706055,
+      "learning_rate": 8.987812213377425e-05,
+      "loss": 1.6959,
+      "step": 1110
+    },
+    {
+      "epoch": 0.09930838801205888,
+      "grad_norm": 7.454516887664795,
+      "learning_rate": 8.968392394074165e-05,
+      "loss": 1.5169,
+      "step": 1120
+    },
+    {
+      "epoch": 0.10019507004788084,
+      "grad_norm": 10.253645896911621,
+      "learning_rate": 8.948809449702714e-05,
+      "loss": 1.6779,
+      "step": 1130
+    },
+    {
+      "epoch": 0.10108175208370278,
+      "grad_norm": 8.075345993041992,
+      "learning_rate": 8.929064185241216e-05,
+      "loss": 1.6622,
+      "step": 1140
+    },
+    {
+      "epoch": 0.10196843411952473,
+      "grad_norm": 11.007535934448242,
+      "learning_rate": 8.909157412340152e-05,
+      "loss": 1.7568,
+      "step": 1150
+    },
+    {
+      "epoch": 0.10285511615534669,
+      "grad_norm": 8.019722938537598,
+      "learning_rate": 8.889089949288989e-05,
+      "loss": 1.6177,
+      "step": 1160
+    },
+    {
+      "epoch": 0.10374179819116865,
+      "grad_norm": 8.618474960327148,
+      "learning_rate": 8.868862620982537e-05,
+      "loss": 1.5605,
+      "step": 1170
+    },
+    {
+      "epoch": 0.1046284802269906,
+      "grad_norm": 8.008125305175781,
+      "learning_rate": 8.848476258887034e-05,
+      "loss": 1.5995,
+      "step": 1180
+    },
+    {
+      "epoch": 0.10551516226281256,
+      "grad_norm": 11.63944149017334,
+      "learning_rate": 8.827931701005976e-05,
+      "loss": 1.5778,
+      "step": 1190
+    },
+    {
+      "epoch": 0.10640184429863452,
+      "grad_norm": 9.485556602478027,
+      "learning_rate": 8.807229791845674e-05,
+      "loss": 1.547,
+      "step": 1200
+    },
+    {
+      "epoch": 0.10640184429863452,
+      "eval_loss": 1.6847599744796753,
+      "eval_runtime": 59.4403,
+      "eval_samples_per_second": 8.412,
+      "eval_steps_per_second": 8.412,
+      "step": 1200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.043639543550935e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null