auxyus commited on
Commit
89ba324
·
verified ·
1 Parent(s): ea301dd

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab9a5df5e1c8060d8373b14a51b4f8888fd97811538de37ef80f3d16fb51073
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72d46c8fe0234ee75f91ec5fa90676188c422627c6ce38b838cd66c10a17fe93
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:217094565747c2b9ed586bc5c7228df1f82c6b477100624d9c248dbb5b158445
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b644d70458c120b462aad650a3b75789fda13a3765010c9382f03c1857faf3
3
  size 85723732
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a748b8dddefa8c3474c694e234ef459c2d59739da98f7097037d44c1f8667fb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6c4fb787dda3b5af96393eea29fe45a41c6f0fda1167422e0eed8251ea6c4d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26b811a0ecde9d77841cf06335777c47657d93b172b0c899d014ac2d55b60923
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110b2f0ce7be25c09ed998ed1965f38a8d3a448ca6aa07e3d4392461b80d705f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7165031433105469,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
- "epoch": 0.1881145788798632,
5
  "eval_steps": 150,
6
- "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1258,6 +1258,119 @@
1258
  "eval_samples_per_second": 24.851,
1259
  "eval_steps_per_second": 6.214,
1260
  "step": 1650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1261
  }
1262
  ],
1263
  "logging_steps": 10,
@@ -1272,7 +1385,7 @@
1272
  "early_stopping_threshold": 0.0
1273
  },
1274
  "attributes": {
1275
- "early_stopping_patience_counter": 1
1276
  }
1277
  },
1278
  "TrainerControl": {
@@ -1281,12 +1394,12 @@
1281
  "should_evaluate": false,
1282
  "should_log": false,
1283
  "should_save": true,
1284
- "should_training_stop": false
1285
  },
1286
  "attributes": {}
1287
  }
1288
  },
1289
- "total_flos": 5.800825007898624e+17,
1290
  "train_batch_size": 4,
1291
  "trial_name": null,
1292
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7165031433105469,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
+ "epoch": 0.20521590423257802,
5
  "eval_steps": 150,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1258
  "eval_samples_per_second": 24.851,
1259
  "eval_steps_per_second": 6.214,
1260
  "step": 1650
1261
+ },
1262
+ {
1263
+ "epoch": 0.18925466723671083,
1264
+ "grad_norm": 20.21547508239746,
1265
+ "learning_rate": 0.0001,
1266
+ "loss": 3.5619,
1267
+ "step": 1660
1268
+ },
1269
+ {
1270
+ "epoch": 0.1903947555935585,
1271
+ "grad_norm": 13.824969291687012,
1272
+ "learning_rate": 0.0001,
1273
+ "loss": 2.724,
1274
+ "step": 1670
1275
+ },
1276
+ {
1277
+ "epoch": 0.19153484395040615,
1278
+ "grad_norm": 19.375152587890625,
1279
+ "learning_rate": 0.0001,
1280
+ "loss": 2.62,
1281
+ "step": 1680
1282
+ },
1283
+ {
1284
+ "epoch": 0.1926749323072538,
1285
+ "grad_norm": 16.543657302856445,
1286
+ "learning_rate": 0.0001,
1287
+ "loss": 2.7389,
1288
+ "step": 1690
1289
+ },
1290
+ {
1291
+ "epoch": 0.19381502066410147,
1292
+ "grad_norm": 42.59777069091797,
1293
+ "learning_rate": 0.0001,
1294
+ "loss": 3.038,
1295
+ "step": 1700
1296
+ },
1297
+ {
1298
+ "epoch": 0.19495510902094912,
1299
+ "grad_norm": 18.97529411315918,
1300
+ "learning_rate": 0.0001,
1301
+ "loss": 3.3123,
1302
+ "step": 1710
1303
+ },
1304
+ {
1305
+ "epoch": 0.1960951973777968,
1306
+ "grad_norm": 16.993947982788086,
1307
+ "learning_rate": 0.0001,
1308
+ "loss": 2.5807,
1309
+ "step": 1720
1310
+ },
1311
+ {
1312
+ "epoch": 0.19723528573464444,
1313
+ "grad_norm": 16.720861434936523,
1314
+ "learning_rate": 0.0001,
1315
+ "loss": 2.711,
1316
+ "step": 1730
1317
+ },
1318
+ {
1319
+ "epoch": 0.19837537409149208,
1320
+ "grad_norm": 17.76796531677246,
1321
+ "learning_rate": 0.0001,
1322
+ "loss": 2.4549,
1323
+ "step": 1740
1324
+ },
1325
+ {
1326
+ "epoch": 0.19951546244833976,
1327
+ "grad_norm": 26.065580368041992,
1328
+ "learning_rate": 0.0001,
1329
+ "loss": 3.1208,
1330
+ "step": 1750
1331
+ },
1332
+ {
1333
+ "epoch": 0.2006555508051874,
1334
+ "grad_norm": 24.055755615234375,
1335
+ "learning_rate": 0.0001,
1336
+ "loss": 3.5162,
1337
+ "step": 1760
1338
+ },
1339
+ {
1340
+ "epoch": 0.20179563916203505,
1341
+ "grad_norm": 16.346284866333008,
1342
+ "learning_rate": 0.0001,
1343
+ "loss": 2.7431,
1344
+ "step": 1770
1345
+ },
1346
+ {
1347
+ "epoch": 0.20293572751888272,
1348
+ "grad_norm": 14.49986457824707,
1349
+ "learning_rate": 0.0001,
1350
+ "loss": 2.6966,
1351
+ "step": 1780
1352
+ },
1353
+ {
1354
+ "epoch": 0.20407581587573037,
1355
+ "grad_norm": 15.518335342407227,
1356
+ "learning_rate": 0.0001,
1357
+ "loss": 2.5071,
1358
+ "step": 1790
1359
+ },
1360
+ {
1361
+ "epoch": 0.20521590423257802,
1362
+ "grad_norm": 22.37300682067871,
1363
+ "learning_rate": 0.0001,
1364
+ "loss": 3.201,
1365
+ "step": 1800
1366
+ },
1367
+ {
1368
+ "epoch": 0.20521590423257802,
1369
+ "eval_loss": 0.7226201295852661,
1370
+ "eval_runtime": 297.2798,
1371
+ "eval_samples_per_second": 24.849,
1372
+ "eval_steps_per_second": 6.213,
1373
+ "step": 1800
1374
  }
1375
  ],
1376
  "logging_steps": 10,
 
1385
  "early_stopping_threshold": 0.0
1386
  },
1387
  "attributes": {
1388
+ "early_stopping_patience_counter": 2
1389
  }
1390
  },
1391
  "TrainerControl": {
 
1394
  "should_evaluate": false,
1395
  "should_log": false,
1396
  "should_save": true,
1397
+ "should_training_stop": true
1398
  },
1399
  "attributes": {}
1400
  }
1401
  },
1402
+ "total_flos": 6.328172735889408e+17,
1403
  "train_batch_size": 4,
1404
  "trial_name": null,
1405
  "trial_params": null