Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac54e7a7ee44983b14f6e81c3224088fde6e918a9548ecf4eb99f1f4386e6e75
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:dba3a9c8daa4bdd1e58028a57631ab9c6677819c890282b6e771c9f4e453820e
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5328de3a04156e2ffdb90d4af3fad8b222382888acdbd4f3bd35e0f570ba23a
 size 253144

 version https://git-lfs.github.com/spec/v1
+oid sha256:b77f30fc7d4d2a0f3c13953f1483910cb776aeb74503dc67517e33f2a8699ffa
 size 253144

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eaf84182c5327cba99ea00da4d1ce9b4d9102b3389d1bc82bb43ffb6c8949bb6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e085fde017ec5c64b8d00529e5946a3fdd2cc2b597c7b3cf7373c02ad0c4c8b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d79de3c6a6d4b735b3fef56d029a72688ed64900a10e70511282f31905a33cc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb2b145cc02354c01563cd3053c6b3f03d7f93c87dfd6b3852b83f2c8fa5f1fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.319465637207031,
-  "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.5486968449931413,
   "eval_steps": 150,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -241,6 +241,119 @@
       "eval_samples_per_second": 651.966,
       "eval_steps_per_second": 163.522,
       "step": 300
     }
   ],
   "logging_steps": 10,
@@ -269,7 +382,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 17360071557120.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.3164701461792,
+  "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.823045267489712,
   "eval_steps": 150,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 651.966,
       "eval_steps_per_second": 163.522,
       "step": 300
+    },
+    {
+      "epoch": 0.566986739826246,
+      "grad_norm": 0.07304774969816208,
+      "learning_rate": 4.63685608183771e-05,
+      "loss": 10.3357,
+      "step": 310
+    },
+    {
+      "epoch": 0.5852766346593508,
+      "grad_norm": 0.0741581916809082,
+      "learning_rate": 4.322570304519023e-05,
+      "loss": 10.3264,
+      "step": 320
+    },
+    {
+      "epoch": 0.6035665294924554,
+      "grad_norm": 0.07260976731777191,
+      "learning_rate": 4.0109903950724134e-05,
+      "loss": 10.3205,
+      "step": 330
+    },
+    {
+      "epoch": 0.6218564243255601,
+      "grad_norm": 0.0811510682106018,
+      "learning_rate": 3.7033609019317374e-05,
+      "loss": 10.319,
+      "step": 340
+    },
+    {
+      "epoch": 0.6401463191586648,
+      "grad_norm": 0.24966461956501007,
+      "learning_rate": 3.400910594322121e-05,
+      "loss": 10.3137,
+      "step": 350
+    },
+    {
+      "epoch": 0.6584362139917695,
+      "grad_norm": 0.07861746102571487,
+      "learning_rate": 3.104847554168106e-05,
+      "loss": 10.3348,
+      "step": 360
+    },
+    {
+      "epoch": 0.6767261088248743,
+      "grad_norm": 0.06537245959043503,
+      "learning_rate": 2.816354350633411e-05,
+      "loss": 10.3262,
+      "step": 370
+    },
+    {
+      "epoch": 0.695016003657979,
+      "grad_norm": 0.08251766115427017,
+      "learning_rate": 2.5365833165666946e-05,
+      "loss": 10.3195,
+      "step": 380
+    },
+    {
+      "epoch": 0.7133058984910837,
+      "grad_norm": 0.10933877527713776,
+      "learning_rate": 2.266651945720694e-05,
+      "loss": 10.317,
+      "step": 390
+    },
+    {
+      "epoch": 0.7315957933241883,
+      "grad_norm": 0.20270827412605286,
+      "learning_rate": 2.0076384291297134e-05,
+      "loss": 10.3157,
+      "step": 400
+    },
+    {
+      "epoch": 0.7498856881572931,
+      "grad_norm": 0.07105053216218948,
+      "learning_rate": 1.7605773484745547e-05,
+      "loss": 10.3334,
+      "step": 410
+    },
+    {
+      "epoch": 0.7681755829903978,
+      "grad_norm": 0.07826591283082962,
+      "learning_rate": 1.5264555436369744e-05,
+      "loss": 10.3251,
+      "step": 420
+    },
+    {
+      "epoch": 0.7864654778235025,
+      "grad_norm": 0.07811526954174042,
+      "learning_rate": 1.3062081709499303e-05,
+      "loss": 10.3205,
+      "step": 430
+    },
+    {
+      "epoch": 0.8047553726566072,
+      "grad_norm": 0.08905858546495438,
+      "learning_rate": 1.1007149678882329e-05,
+      "loss": 10.319,
+      "step": 440
+    },
+    {
+      "epoch": 0.823045267489712,
+      "grad_norm": 0.24548448622226715,
+      "learning_rate": 9.107967391195903e-06,
+      "loss": 10.315,
+      "step": 450
+    },
+    {
+      "epoch": 0.823045267489712,
+      "eval_loss": 10.3164701461792,
+      "eval_runtime": 1.4196,
+      "eval_samples_per_second": 648.792,
+      "eval_steps_per_second": 162.726,
+      "step": 450
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 26019191586816.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null