Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c05c7280966610c518770ea427fea6a734df8d0e4a1ffe8d11b35ee87d2ac553
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:12fcc2c6875c4cb562fe03827cc336fa514ac4c04e7eb77ac7a78337c369b6e4
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27023b9307bbb8857582e814102746ce7fb0615482de67554b9e51e1d11fee1d
 size 646253418

 version https://git-lfs.github.com/spec/v1
+oid sha256:8045dbfc2ae7b8c916d96d5aebeaf8180d75d54a1b17d4dc099073996d0fde36
 size 646253418

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57c78f2bafa1a6ffa696ddd2638882b05354012c022a1add6f9d2419584a9b74
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e86801f225b41a90a5ff3a27a4cfdc797ffe2c344cc306dade1199afa95fa95b
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6beaf2b1b39920019d2cdaf74fd65fa168d0f3d379e84462da7b7b51581b9e0
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d89a3602a1002a639d34b3d1f2ef8045da54b0b6c888c61d11cc557e674bfd2b
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e95bcfa77028271b4a392a29a9b65075a4d40dcb0a7bd38544da317dbfc94c5
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:85e5a8a67e0882a34a98b7fccff08d132fd2f716f125214db7e1f2d4876fa860
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a372741b00540847787731274e4dcee35816467685a272503f5ef60245e3a830
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7d0c6f7b8adba6859188d0d0e0ed561cad961e8ebf25112816a842442fa5e97
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.04135577753186226,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.046718056528848396,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 27.18,
       "eval_steps_per_second": 7.067,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.850694305457111e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.015838705003261566,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.09343611305769679,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.18,
       "eval_steps_per_second": 7.067,
       "step": 25
+    },
+    {
+      "epoch": 0.04858677879000234,
+      "grad_norm": 0.5107179284095764,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 0.4535,
+      "step": 26
+    },
+    {
+      "epoch": 0.050455501051156273,
+      "grad_norm": 0.4900267422199249,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 0.0796,
+      "step": 27
+    },
+    {
+      "epoch": 0.05232422331231021,
+      "grad_norm": 0.3207160234451294,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 0.043,
+      "step": 28
+    },
+    {
+      "epoch": 0.054192945573464144,
+      "grad_norm": 0.5145649313926697,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 0.0424,
+      "step": 29
+    },
+    {
+      "epoch": 0.05606166783461808,
+      "grad_norm": 0.22890864312648773,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 0.0244,
+      "step": 30
+    },
+    {
+      "epoch": 0.057930390095772014,
+      "grad_norm": 0.23109875619411469,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 0.0231,
+      "step": 31
+    },
+    {
+      "epoch": 0.05979911235692595,
+      "grad_norm": 0.11645814031362534,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 0.0134,
+      "step": 32
+    },
+    {
+      "epoch": 0.06166783461807989,
+      "grad_norm": 0.18600647151470184,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 0.0153,
+      "step": 33
+    },
+    {
+      "epoch": 0.06353655687923382,
+      "grad_norm": 0.2838912904262543,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 0.0163,
+      "step": 34
+    },
+    {
+      "epoch": 0.06540527914038775,
+      "grad_norm": 0.24853822588920593,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 0.0185,
+      "step": 35
+    },
+    {
+      "epoch": 0.0672740014015417,
+      "grad_norm": 0.20572882890701294,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 0.0135,
+      "step": 36
+    },
+    {
+      "epoch": 0.06914272366269564,
+      "grad_norm": 0.22797401249408722,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 0.0179,
+      "step": 37
+    },
+    {
+      "epoch": 0.07101144592384957,
+      "grad_norm": 0.6385992765426636,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 0.3246,
+      "step": 38
+    },
+    {
+      "epoch": 0.07288016818500351,
+      "grad_norm": 0.25923556089401245,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 0.0883,
+      "step": 39
+    },
+    {
+      "epoch": 0.07474889044615744,
+      "grad_norm": 0.1639515459537506,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 0.0275,
+      "step": 40
+    },
+    {
+      "epoch": 0.07661761270731138,
+      "grad_norm": 0.1730871945619583,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 0.0234,
+      "step": 41
+    },
+    {
+      "epoch": 0.07848633496846531,
+      "grad_norm": 0.23780955374240875,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 0.0164,
+      "step": 42
+    },
+    {
+      "epoch": 0.08035505722961925,
+      "grad_norm": 0.14500492811203003,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 0.013,
+      "step": 43
+    },
+    {
+      "epoch": 0.08222377949077318,
+      "grad_norm": 0.18985842168331146,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 0.0164,
+      "step": 44
+    },
+    {
+      "epoch": 0.08409250175192712,
+      "grad_norm": 0.11164896190166473,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 0.01,
+      "step": 45
+    },
+    {
+      "epoch": 0.08596122401308105,
+      "grad_norm": 0.11438169330358505,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 0.0078,
+      "step": 46
+    },
+    {
+      "epoch": 0.08782994627423499,
+      "grad_norm": 0.17679692804813385,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 0.0074,
+      "step": 47
+    },
+    {
+      "epoch": 0.08969866853538892,
+      "grad_norm": 0.12496514618396759,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 0.0074,
+      "step": 48
+    },
+    {
+      "epoch": 0.09156739079654286,
+      "grad_norm": 0.1692323088645935,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 0.009,
+      "step": 49
+    },
+    {
+      "epoch": 0.09343611305769679,
+      "grad_norm": 0.1172904223203659,
+      "learning_rate": 1e-05,
+      "loss": 0.0082,
+      "step": 50
+    },
+    {
+      "epoch": 0.09343611305769679,
+      "eval_loss": 0.015838705003261566,
+      "eval_runtime": 1.8329,
+      "eval_samples_per_second": 27.28,
+      "eval_steps_per_second": 7.093,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.6943585332822016e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null