Romain-XV commited on
Commit
bb5782b
·
verified ·
1 Parent(s): c7b8fd9

Training in progress, step 214, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:106b74ea3601fd9707f4d23f588ca0b3da2860e706b91a94a2fa0c18d22ec0b3
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2f38ed21bc8c5c9fecd0f8ff925f2317d77fe97af022f9c059a7840e2a2dc67
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d66ab68c4fa014f4da887dd89326cf6f0650184d99f346b39310d8011d695983
3
  size 37965300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb0c2c2e3f896879971c737feec6155a8ba78caf1364c2051011a8edce89aa22
3
  size 37965300
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21fddc574ae167cca5bc69fb1c46e2adfba9c0da8f31f308912edc914cb9882e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4cc819653ed1f55ce643a6c9060864848c25c052d08d3678103e838defc2ad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81ebd5cd18dfce87b4c13a120f63e848ddd310a8ad58a4ea4a53a8fb3982cdad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802f115045aeeb27907b881b744440b538c2582f39dcf8d05da0ddabad9a975b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6292288303375244,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.9305030532131433,
5
  "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1447,6 +1447,104 @@
1447
  "eval_samples_per_second": 34.61,
1448
  "eval_steps_per_second": 8.652,
1449
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
  }
1451
  ],
1452
  "logging_steps": 1,
@@ -1470,12 +1568,12 @@
1470
  "should_evaluate": false,
1471
  "should_log": false,
1472
  "should_save": true,
1473
- "should_training_stop": false
1474
  },
1475
  "attributes": {}
1476
  }
1477
  },
1478
- "total_flos": 1.045015069458432e+17,
1479
  "train_batch_size": 4,
1480
  "trial_name": null,
1481
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6292288303375244,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.9956382669380633,
5
  "eval_steps": 50,
6
+ "global_step": 214,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1447
  "eval_samples_per_second": 34.61,
1448
  "eval_steps_per_second": 8.652,
1449
  "step": 200
1450
+ },
1451
+ {
1452
+ "epoch": 0.9351555684792091,
1453
+ "grad_norm": 1.598107933998108,
1454
+ "learning_rate": 1.9973082526568154e-06,
1455
+ "loss": 1.4567,
1456
+ "step": 201
1457
+ },
1458
+ {
1459
+ "epoch": 0.9398080837452748,
1460
+ "grad_norm": 2.187147855758667,
1461
+ "learning_rate": 1.7026900316098215e-06,
1462
+ "loss": 1.6859,
1463
+ "step": 202
1464
+ },
1465
+ {
1466
+ "epoch": 0.9444605990113405,
1467
+ "grad_norm": 1.804359793663025,
1468
+ "learning_rate": 1.4313834308486097e-06,
1469
+ "loss": 1.614,
1470
+ "step": 203
1471
+ },
1472
+ {
1473
+ "epoch": 0.9491131142774062,
1474
+ "grad_norm": 1.7503759860992432,
1475
+ "learning_rate": 1.1834527918740623e-06,
1476
+ "loss": 1.68,
1477
+ "step": 204
1478
+ },
1479
+ {
1480
+ "epoch": 0.953765629543472,
1481
+ "grad_norm": 2.0529308319091797,
1482
+ "learning_rate": 9.589569124794916e-07,
1483
+ "loss": 1.7563,
1484
+ "step": 205
1485
+ },
1486
+ {
1487
+ "epoch": 0.9584181448095377,
1488
+ "grad_norm": 1.7820945978164673,
1489
+ "learning_rate": 7.579490328064265e-07,
1490
+ "loss": 1.516,
1491
+ "step": 206
1492
+ },
1493
+ {
1494
+ "epoch": 0.9630706600756034,
1495
+ "grad_norm": 1.8575091361999512,
1496
+ "learning_rate": 5.804768227185565e-07,
1497
+ "loss": 1.6248,
1498
+ "step": 207
1499
+ },
1500
+ {
1501
+ "epoch": 0.9677231753416691,
1502
+ "grad_norm": 1.8180886507034302,
1503
+ "learning_rate": 4.2658237049655323e-07,
1504
+ "loss": 1.6101,
1505
+ "step": 208
1506
+ },
1507
+ {
1508
+ "epoch": 0.9723756906077348,
1509
+ "grad_norm": 1.6702853441238403,
1510
+ "learning_rate": 2.963021728567106e-07,
1511
+ "loss": 1.5597,
1512
+ "step": 209
1513
+ },
1514
+ {
1515
+ "epoch": 0.9770282058738006,
1516
+ "grad_norm": 1.678638219833374,
1517
+ "learning_rate": 1.8966712629558957e-07,
1518
+ "loss": 1.5329,
1519
+ "step": 210
1520
+ },
1521
+ {
1522
+ "epoch": 0.9816807211398663,
1523
+ "grad_norm": 1.6849240064620972,
1524
+ "learning_rate": 1.0670251976275803e-07,
1525
+ "loss": 1.5622,
1526
+ "step": 211
1527
+ },
1528
+ {
1529
+ "epoch": 0.986333236405932,
1530
+ "grad_norm": 1.7889765501022339,
1531
+ "learning_rate": 4.74280286634099e-08,
1532
+ "loss": 1.4413,
1533
+ "step": 212
1534
+ },
1535
+ {
1536
+ "epoch": 0.9909857516719977,
1537
+ "grad_norm": 1.7154433727264404,
1538
+ "learning_rate": 1.1857710192308969e-08,
1539
+ "loss": 1.5593,
1540
+ "step": 213
1541
+ },
1542
+ {
1543
+ "epoch": 0.9956382669380633,
1544
+ "grad_norm": 1.744558572769165,
1545
+ "learning_rate": 0.0,
1546
+ "loss": 1.5847,
1547
+ "step": 214
1548
  }
1549
  ],
1550
  "logging_steps": 1,
 
1568
  "should_evaluate": false,
1569
  "should_log": false,
1570
  "should_save": true,
1571
+ "should_training_stop": true
1572
  },
1573
  "attributes": {}
1574
  }
1575
  },
1576
+ "total_flos": 1.1181661243205222e+17,
1577
  "train_batch_size": 4,
1578
  "trial_name": null,
1579
  "trial_params": null