besimray commited on
Commit
f430497
·
verified ·
1 Parent(s): 6be2fb4

Training in progress, step 170, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ab93c62897f662d4d38ac81ba6722525a4a131aab584b861fa9fe9595eaa00d
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae35da49136aca031719c5ca8f8f823b595f9a54cfcebd96d23b76072493171
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e5b4457d78966297bba2691418c3f2a52643bb3d30071b52983a78ba77d6829
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448ae083d060668e44056b69653fdd0049338729d1b521feb302ee22f924cb46
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f70c79d838c6ac44cd657014b2c5e6d9665e7b30e4720655ef5f14c685c34ba
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844207f7fc1f250f3c3227664c738804581a7c64524623dea4b56e1bb8b53b4b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac2f052986474c47aa92ee09f81c986cf5cd3c01bde827e5d887e85b2bbda4c2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28dc6e77e7615b07bf838112784b57c68045402225387150827a3ab1f6905779
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.024553298950195,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-160",
4
- "epoch": 0.007231474994915369,
5
  "eval_steps": 5,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1391,6 +1391,92 @@
1391
  "eval_samples_per_second": 52.873,
1392
  "eval_steps_per_second": 26.439,
1393
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1394
  }
1395
  ],
1396
  "logging_steps": 1,
@@ -1419,7 +1505,7 @@
1419
  "attributes": {}
1420
  }
1421
  },
1422
- "total_flos": 1681077043200.0,
1423
  "train_batch_size": 2,
1424
  "trial_name": null,
1425
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.023889541625977,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-170",
4
+ "epoch": 0.00768344218209758,
5
  "eval_steps": 5,
6
+ "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1391
  "eval_samples_per_second": 52.873,
1392
  "eval_steps_per_second": 26.439,
1393
  "step": 160
1394
+ },
1395
+ {
1396
+ "epoch": 0.00727667171363359,
1397
+ "grad_norm": 0.5078541040420532,
1398
+ "learning_rate": 0.000156684647388045,
1399
+ "loss": 44.0764,
1400
+ "step": 161
1401
+ },
1402
+ {
1403
+ "epoch": 0.007321868432351811,
1404
+ "grad_norm": 0.46269139647483826,
1405
+ "learning_rate": 0.0001561552986800375,
1406
+ "loss": 44.0991,
1407
+ "step": 162
1408
+ },
1409
+ {
1410
+ "epoch": 0.007367065151070032,
1411
+ "grad_norm": 0.5498519539833069,
1412
+ "learning_rate": 0.0001556236416476465,
1413
+ "loss": 44.1389,
1414
+ "step": 163
1415
+ },
1416
+ {
1417
+ "epoch": 0.007412261869788253,
1418
+ "grad_norm": 0.8603391647338867,
1419
+ "learning_rate": 0.00015508969814521025,
1420
+ "loss": 44.1567,
1421
+ "step": 164
1422
+ },
1423
+ {
1424
+ "epoch": 0.007457458588506475,
1425
+ "grad_norm": 0.6750001907348633,
1426
+ "learning_rate": 0.00015455349012105486,
1427
+ "loss": 44.1007,
1428
+ "step": 165
1429
+ },
1430
+ {
1431
+ "epoch": 0.007457458588506475,
1432
+ "eval_loss": 11.024243354797363,
1433
+ "eval_runtime": 175.9806,
1434
+ "eval_samples_per_second": 52.943,
1435
+ "eval_steps_per_second": 26.475,
1436
+ "step": 165
1437
+ },
1438
+ {
1439
+ "epoch": 0.007502655307224696,
1440
+ "grad_norm": 0.5474929809570312,
1441
+ "learning_rate": 0.00015401503961659204,
1442
+ "loss": 44.0842,
1443
+ "step": 166
1444
+ },
1445
+ {
1446
+ "epoch": 0.007547852025942917,
1447
+ "grad_norm": 0.5558362603187561,
1448
+ "learning_rate": 0.00015347436876541297,
1449
+ "loss": 44.1025,
1450
+ "step": 167
1451
+ },
1452
+ {
1453
+ "epoch": 0.007593048744661137,
1454
+ "grad_norm": 0.5435320138931274,
1455
+ "learning_rate": 0.00015293149979237876,
1456
+ "loss": 44.073,
1457
+ "step": 168
1458
+ },
1459
+ {
1460
+ "epoch": 0.007638245463379358,
1461
+ "grad_norm": 0.41495761275291443,
1462
+ "learning_rate": 0.00015238645501270654,
1463
+ "loss": 44.0608,
1464
+ "step": 169
1465
+ },
1466
+ {
1467
+ "epoch": 0.00768344218209758,
1468
+ "grad_norm": 0.4491158127784729,
1469
+ "learning_rate": 0.00015183925683105254,
1470
+ "loss": 44.0995,
1471
+ "step": 170
1472
+ },
1473
+ {
1474
+ "epoch": 0.00768344218209758,
1475
+ "eval_loss": 11.023889541625977,
1476
+ "eval_runtime": 176.2494,
1477
+ "eval_samples_per_second": 52.863,
1478
+ "eval_steps_per_second": 26.434,
1479
+ "step": 170
1480
  }
1481
  ],
1482
  "logging_steps": 1,
 
1505
  "attributes": {}
1506
  }
1507
  },
1508
+ "total_flos": 1786144358400.0,
1509
  "train_batch_size": 2,
1510
  "trial_name": null,
1511
  "trial_params": null