besimray commited on
Commit
453684e
·
verified ·
1 Parent(s): a8e20e0

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee338dc939b7c042fee22bc3023129715453e09ab7503d0fbab68f2a9d9e7429
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9922b08d6d27775f70906696e738967a9dfda26360726a77d0336c22db2a94
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1685e33900566f466c94654c0999c9110defe07d1d83c288d3920ab9d12b2a82
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac6daf0448867278648636f54dadb2e9c3630816a32c6d44b1a905f09e6668e
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07dfcab91d136e6f3877a1517e4a4558c6ab33a709be94712f36072861ae0974
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:917aa8551bb3ce136b5c3a60b81a06542b98442edf18388bba6bc0c0d0ada806
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c049ad9892b8ae242eb26f06a6af3edec6b865f6613ddc97103e21f4231f6420
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8fa6a7a6709edeb55cdf23229934c07be2d8aae0e4056fbdb6ff2482d0eb3d3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.026728630065918,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-140",
4
- "epoch": 0.006327540620550948,
5
  "eval_steps": 5,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1219,6 +1219,92 @@
1219
  "eval_samples_per_second": 52.878,
1220
  "eval_steps_per_second": 26.442,
1221
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1222
  }
1223
  ],
1224
  "logging_steps": 1,
@@ -1247,7 +1333,7 @@
1247
  "attributes": {}
1248
  }
1249
  },
1250
- "total_flos": 1470942412800.0,
1251
  "train_batch_size": 2,
1252
  "trial_name": null,
1253
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.025545120239258,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.006779507807733159,
5
  "eval_steps": 5,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1219
  "eval_samples_per_second": 52.878,
1220
  "eval_steps_per_second": 26.442,
1221
  "step": 140
1222
+ },
1223
+ {
1224
+ "epoch": 0.006372737339269169,
1225
+ "grad_norm": 0.47628021240234375,
1226
+ "learning_rate": 0.0001667540876547148,
1227
+ "loss": 44.1197,
1228
+ "step": 141
1229
+ },
1230
+ {
1231
+ "epoch": 0.00641793405798739,
1232
+ "grad_norm": 0.4244273006916046,
1233
+ "learning_rate": 0.0001662753422842123,
1234
+ "loss": 44.0529,
1235
+ "step": 142
1236
+ },
1237
+ {
1238
+ "epoch": 0.006463130776705611,
1239
+ "grad_norm": 0.4019363820552826,
1240
+ "learning_rate": 0.00016579387259397127,
1241
+ "loss": 44.107,
1242
+ "step": 143
1243
+ },
1244
+ {
1245
+ "epoch": 0.0065083274954238325,
1246
+ "grad_norm": 0.41666439175605774,
1247
+ "learning_rate": 0.00016530969837532487,
1248
+ "loss": 44.1185,
1249
+ "step": 144
1250
+ },
1251
+ {
1252
+ "epoch": 0.006553524214142053,
1253
+ "grad_norm": 0.52204829454422,
1254
+ "learning_rate": 0.00016482283953077887,
1255
+ "loss": 44.0868,
1256
+ "step": 145
1257
+ },
1258
+ {
1259
+ "epoch": 0.006553524214142053,
1260
+ "eval_loss": 11.026100158691406,
1261
+ "eval_runtime": 175.9985,
1262
+ "eval_samples_per_second": 52.938,
1263
+ "eval_steps_per_second": 26.472,
1264
+ "step": 145
1265
+ },
1266
+ {
1267
+ "epoch": 0.006598720932860274,
1268
+ "grad_norm": 0.4917082190513611,
1269
+ "learning_rate": 0.00016433331607319343,
1270
+ "loss": 44.0786,
1271
+ "step": 146
1272
+ },
1273
+ {
1274
+ "epoch": 0.006643917651578495,
1275
+ "grad_norm": 0.6054917573928833,
1276
+ "learning_rate": 0.00016384114812496056,
1277
+ "loss": 44.0952,
1278
+ "step": 147
1279
+ },
1280
+ {
1281
+ "epoch": 0.006689114370296716,
1282
+ "grad_norm": 0.46359196305274963,
1283
+ "learning_rate": 0.00016334635591717703,
1284
+ "loss": 44.1401,
1285
+ "step": 148
1286
+ },
1287
+ {
1288
+ "epoch": 0.0067343110890149376,
1289
+ "grad_norm": 0.5335073471069336,
1290
+ "learning_rate": 0.00016284895978881236,
1291
+ "loss": 44.0664,
1292
+ "step": 149
1293
+ },
1294
+ {
1295
+ "epoch": 0.006779507807733159,
1296
+ "grad_norm": 0.3754950761795044,
1297
+ "learning_rate": 0.00016234898018587337,
1298
+ "loss": 44.1361,
1299
+ "step": 150
1300
+ },
1301
+ {
1302
+ "epoch": 0.006779507807733159,
1303
+ "eval_loss": 11.025545120239258,
1304
+ "eval_runtime": 176.2544,
1305
+ "eval_samples_per_second": 52.861,
1306
+ "eval_steps_per_second": 26.433,
1307
+ "step": 150
1308
  }
1309
  ],
1310
  "logging_steps": 1,
 
1333
  "attributes": {}
1334
  }
1335
  },
1336
+ "total_flos": 1576009728000.0,
1337
  "train_batch_size": 2,
1338
  "trial_name": null,
1339
  "trial_params": null