Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0687e4d8f6cf89721ab78bed53eb4ae977447da0fc9fb2448416b6e5665e6a19
 size 550593184

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ba6eb0915706a051b7bd0e910906188d291e5bfdc20358c4fd0a75e63479029
 size 550593184

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5656d36cb98eb6eada2688053a12fd7a297ab77ae12b8b6b58b3097a87bb0451
 size 1101572914

 version https://git-lfs.github.com/spec/v1
+oid sha256:a125e5e23ae60444995a9ee989940339caa787478d66cac70533fdaee0174d5b
 size 1101572914

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59cca1a27673a63ef348b90440de964c83b6c854a2b1816f2135d0e1c33d8623
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:28278fc2af2fbd2fb25f9ac77a25b969c286b6ad3d05b1457f9d59beafd0efb8
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e627200a79825f01d5d7b4bad559d98d1c0c811aab2f915ade0c9dc457304690
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:be5f8d8d0588cf228e0be2b0fe9262ab6bfa2747f9686c0aacf43d774b7dd160
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5d82625809dec95d45e537c310889b19d489fcd9312d2a3a912203be3971213
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b75f843464c11d1a10eae20c5f15438755394d6eee83924b39025bbe08de370
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f66a7a1406643645b9e0799b8f56489062d3f257b63a064af8a3be9b316bd46
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae04c3559d1f3db66ca24c80b5e84d5aa2c00efe4ccee0163f042d2a4dbe8503
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.03939060494303703,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.25157232704402516,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 21.715,
       "eval_steps_per_second": 2.723,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.555785010511872e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.022800911217927933,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.5031446540880503,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.715,
       "eval_steps_per_second": 2.723,
       "step": 25
+    },
+    {
+      "epoch": 0.26163522012578616,
+      "grad_norm": 0.33435148000717163,
+      "learning_rate": 5e-05,
+      "loss": 0.0727,
+      "step": 26
+    },
+    {
+      "epoch": 0.27169811320754716,
+      "grad_norm": 0.32047706842422485,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.0474,
+      "step": 27
+    },
+    {
+      "epoch": 0.28176100628930817,
+      "grad_norm": 0.082615427672863,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.0325,
+      "step": 28
+    },
+    {
+      "epoch": 0.2918238993710692,
+      "grad_norm": 0.38494592905044556,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.0357,
+      "step": 29
+    },
+    {
+      "epoch": 0.3018867924528302,
+      "grad_norm": 0.32642385363578796,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.0315,
+      "step": 30
+    },
+    {
+      "epoch": 0.3119496855345912,
+      "grad_norm": 0.1752299666404724,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.0231,
+      "step": 31
+    },
+    {
+      "epoch": 0.3220125786163522,
+      "grad_norm": 0.31406933069229126,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.0218,
+      "step": 32
+    },
+    {
+      "epoch": 0.3320754716981132,
+      "grad_norm": 0.1449318379163742,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.0171,
+      "step": 33
+    },
+    {
+      "epoch": 0.3421383647798742,
+      "grad_norm": 0.0971163660287857,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.0179,
+      "step": 34
+    },
+    {
+      "epoch": 0.3522012578616352,
+      "grad_norm": 0.06397205591201782,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.0136,
+      "step": 35
+    },
+    {
+      "epoch": 0.3622641509433962,
+      "grad_norm": 0.09287846088409424,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.0155,
+      "step": 36
+    },
+    {
+      "epoch": 0.3723270440251572,
+      "grad_norm": 0.11693109571933746,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.0119,
+      "step": 37
+    },
+    {
+      "epoch": 0.38238993710691827,
+      "grad_norm": 0.207436203956604,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.059,
+      "step": 38
+    },
+    {
+      "epoch": 0.39245283018867927,
+      "grad_norm": 0.23645782470703125,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.0438,
+      "step": 39
+    },
+    {
+      "epoch": 0.4025157232704403,
+      "grad_norm": 0.2462545931339264,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.039,
+      "step": 40
+    },
+    {
+      "epoch": 0.4125786163522013,
+      "grad_norm": 0.1835210770368576,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.0284,
+      "step": 41
+    },
+    {
+      "epoch": 0.4226415094339623,
+      "grad_norm": 0.16804800927639008,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.0279,
+      "step": 42
+    },
+    {
+      "epoch": 0.4327044025157233,
+      "grad_norm": 0.047770529985427856,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.0179,
+      "step": 43
+    },
+    {
+      "epoch": 0.4427672955974843,
+      "grad_norm": 0.05572060868144035,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.0162,
+      "step": 44
+    },
+    {
+      "epoch": 0.4528301886792453,
+      "grad_norm": 0.04772639647126198,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.0148,
+      "step": 45
+    },
+    {
+      "epoch": 0.4628930817610063,
+      "grad_norm": 0.05174249783158302,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.0129,
+      "step": 46
+    },
+    {
+      "epoch": 0.4729559748427673,
+      "grad_norm": 0.046342507004737854,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.0095,
+      "step": 47
+    },
+    {
+      "epoch": 0.4830188679245283,
+      "grad_norm": 0.05481298267841339,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.012,
+      "step": 48
+    },
+    {
+      "epoch": 0.4930817610062893,
+      "grad_norm": 0.04954361915588379,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.0139,
+      "step": 49
+    },
+    {
+      "epoch": 0.5031446540880503,
+      "grad_norm": 0.09124169498682022,
+      "learning_rate": 0.0,
+      "loss": 0.0171,
+      "step": 50
+    },
+    {
+      "epoch": 0.5031446540880503,
+      "eval_loss": 0.022800911217927933,
+      "eval_runtime": 30.727,
+      "eval_samples_per_second": 21.805,
+      "eval_steps_per_second": 2.734,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1111570021023744e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null