besimray commited on
Commit
6565831
·
verified ·
1 Parent(s): 8b2cd86

Training in progress, step 180, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cae35da49136aca031719c5ca8f8f823b595f9a54cfcebd96d23b76072493171
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c8aadcc25f22b5bd1fff3362f07043a38073d83188469ddb5bce8d545b884f
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:448ae083d060668e44056b69653fdd0049338729d1b521feb302ee22f924cb46
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc3ccfb8c5981089a4b8c855ecc6afb5559dd1e01e57ce3254eff726ed1e7efb
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:844207f7fc1f250f3c3227664c738804581a7c64524623dea4b56e1bb8b53b4b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db28645e3a5ed38f3c725d595cee3c53b367101eca9d631abd8e1db85596d3f2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28dc6e77e7615b07bf838112784b57c68045402225387150827a3ab1f6905779
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05495c329a0a59adfbdcbe310642bb4a2adcb593713c0b96973034e9930bed7e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.023889541625977,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-170",
4
- "epoch": 0.00768344218209758,
5
  "eval_steps": 5,
6
- "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1477,6 +1477,92 @@
1477
  "eval_samples_per_second": 52.863,
1478
  "eval_steps_per_second": 26.434,
1479
  "step": 170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1480
  }
1481
  ],
1482
  "logging_steps": 1,
@@ -1505,7 +1591,7 @@
1505
  "attributes": {}
1506
  }
1507
  },
1508
- "total_flos": 1786144358400.0,
1509
  "train_batch_size": 2,
1510
  "trial_name": null,
1511
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.023147583007812,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-180",
4
+ "epoch": 0.00813540936927979,
5
  "eval_steps": 5,
6
+ "global_step": 180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1477
  "eval_samples_per_second": 52.863,
1478
  "eval_steps_per_second": 26.434,
1479
  "step": 170
1480
+ },
1481
+ {
1482
+ "epoch": 0.007728638900815801,
1483
+ "grad_norm": 0.6309311389923096,
1484
+ "learning_rate": 0.00015128992774059063,
1485
+ "loss": 44.1244,
1486
+ "step": 171
1487
+ },
1488
+ {
1489
+ "epoch": 0.007773835619534022,
1490
+ "grad_norm": 0.4494941830635071,
1491
+ "learning_rate": 0.00015073849032208822,
1492
+ "loss": 44.1336,
1493
+ "step": 172
1494
+ },
1495
+ {
1496
+ "epoch": 0.007819032338252242,
1497
+ "grad_norm": 0.5996090173721313,
1498
+ "learning_rate": 0.00015018496724297778,
1499
+ "loss": 44.1116,
1500
+ "step": 173
1501
+ },
1502
+ {
1503
+ "epoch": 0.007864229056970463,
1504
+ "grad_norm": 0.73329097032547,
1505
+ "learning_rate": 0.00014962938125642503,
1506
+ "loss": 44.1541,
1507
+ "step": 174
1508
+ },
1509
+ {
1510
+ "epoch": 0.007909425775688685,
1511
+ "grad_norm": 0.5808178186416626,
1512
+ "learning_rate": 0.0001490717552003938,
1513
+ "loss": 44.114,
1514
+ "step": 175
1515
+ },
1516
+ {
1517
+ "epoch": 0.007909425775688685,
1518
+ "eval_loss": 11.023494720458984,
1519
+ "eval_runtime": 175.9386,
1520
+ "eval_samples_per_second": 52.956,
1521
+ "eval_steps_per_second": 26.481,
1522
+ "step": 175
1523
+ },
1524
+ {
1525
+ "epoch": 0.007954622494406906,
1526
+ "grad_norm": 0.46136102080345154,
1527
+ "learning_rate": 0.00014851211199670721,
1528
+ "loss": 44.0922,
1529
+ "step": 176
1530
+ },
1531
+ {
1532
+ "epoch": 0.007999819213125127,
1533
+ "grad_norm": 0.4197680354118347,
1534
+ "learning_rate": 0.0001479504746501054,
1535
+ "loss": 44.0494,
1536
+ "step": 177
1537
+ },
1538
+ {
1539
+ "epoch": 0.008045015931843348,
1540
+ "grad_norm": 0.4883246421813965,
1541
+ "learning_rate": 0.00014738686624729986,
1542
+ "loss": 44.0914,
1543
+ "step": 178
1544
+ },
1545
+ {
1546
+ "epoch": 0.00809021265056157,
1547
+ "grad_norm": 0.4930349588394165,
1548
+ "learning_rate": 0.0001468213099560246,
1549
+ "loss": 44.0695,
1550
+ "step": 179
1551
+ },
1552
+ {
1553
+ "epoch": 0.00813540936927979,
1554
+ "grad_norm": 0.5016703009605408,
1555
+ "learning_rate": 0.00014625382902408356,
1556
+ "loss": 44.0501,
1557
+ "step": 180
1558
+ },
1559
+ {
1560
+ "epoch": 0.00813540936927979,
1561
+ "eval_loss": 11.023147583007812,
1562
+ "eval_runtime": 176.3497,
1563
+ "eval_samples_per_second": 52.833,
1564
+ "eval_steps_per_second": 26.419,
1565
+ "step": 180
1566
  }
1567
  ],
1568
  "logging_steps": 1,
 
1591
  "attributes": {}
1592
  }
1593
  },
1594
+ "total_flos": 1891211673600.0,
1595
  "train_batch_size": 2,
1596
  "trial_name": null,
1597
  "trial_params": null