antimage88 commited on
Commit
ffb163a
·
verified ·
1 Parent(s): e821c91

Training in progress, step 212, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3bf6d6a2180aaa44b0684de1c1b4c44c876e3b61e90774ecf34f9d75d0e5306
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f3ae69127fc293d24c01e08524b58c279e8c5f0ae7b6c7a1ef22f6be3e77291
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:753552200a46d3e7e4d375873d1c04cbc4b9acbcf9d4abe320d73e382fdda544
3
  size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdcf04cb6bf4c0b0d7b9f8f1eb99e7b5124bbb4f4b8805d7916763277c22a05b
3
  size 325339796
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7df43c4526bd9a8ecddc4847c60b296a89597a43586e7d1e215a1b5c2cbd258
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27cc510000b450ada0ab6c2ce09449d46c05dfdfaf1135374ca981c94e122f97
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f462ed10ef931ef3980d2aa020052a49dcadd400550d826d7ccc8a4e298b95f9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b80340678794eceb27a8ea7f91bc6361013d570f26cb607a0c954f150a1a4c00
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.1042157411575317,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.9456264775413712,
5
  "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1447,6 +1447,90 @@
1447
  "eval_samples_per_second": 9.275,
1448
  "eval_steps_per_second": 2.338,
1449
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
  }
1451
  ],
1452
  "logging_steps": 1,
@@ -1470,12 +1554,12 @@
1470
  "should_evaluate": false,
1471
  "should_log": false,
1472
  "should_save": true,
1473
- "should_training_stop": false
1474
  },
1475
  "attributes": {}
1476
  }
1477
  },
1478
- "total_flos": 2.6742965062651085e+17,
1479
  "train_batch_size": 8,
1480
  "trial_name": null,
1481
  "trial_params": null
 
1
  {
2
  "best_metric": 1.1042157411575317,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 1.0023640661938533,
5
  "eval_steps": 50,
6
+ "global_step": 212,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1447
  "eval_samples_per_second": 9.275,
1448
  "eval_steps_per_second": 2.338,
1449
  "step": 200
1450
+ },
1451
+ {
1452
+ "epoch": 0.950354609929078,
1453
+ "grad_norm": 0.3739728331565857,
1454
+ "learning_rate": 7.29899235565934e-07,
1455
+ "loss": 1.1624,
1456
+ "step": 201
1457
+ },
1458
+ {
1459
+ "epoch": 0.9550827423167849,
1460
+ "grad_norm": 0.35657617449760437,
1461
+ "learning_rate": 6.034780129621664e-07,
1462
+ "loss": 1.0881,
1463
+ "step": 202
1464
+ },
1465
+ {
1466
+ "epoch": 0.9598108747044918,
1467
+ "grad_norm": 0.37702855467796326,
1468
+ "learning_rate": 4.890044990165321e-07,
1469
+ "loss": 1.1991,
1470
+ "step": 203
1471
+ },
1472
+ {
1473
+ "epoch": 0.9645390070921985,
1474
+ "grad_norm": 0.3940827250480652,
1475
+ "learning_rate": 3.8650638183617694e-07,
1476
+ "loss": 1.1217,
1477
+ "step": 204
1478
+ },
1479
+ {
1480
+ "epoch": 0.9692671394799054,
1481
+ "grad_norm": 0.3676983118057251,
1482
+ "learning_rate": 2.9600845299737056e-07,
1483
+ "loss": 1.0818,
1484
+ "step": 205
1485
+ },
1486
+ {
1487
+ "epoch": 0.9739952718676123,
1488
+ "grad_norm": 0.399383008480072,
1489
+ "learning_rate": 2.1753260154906973e-07,
1490
+ "loss": 1.0898,
1491
+ "step": 206
1492
+ },
1493
+ {
1494
+ "epoch": 0.9787234042553191,
1495
+ "grad_norm": 0.3540259599685669,
1496
+ "learning_rate": 1.5109780871853663e-07,
1497
+ "loss": 1.0337,
1498
+ "step": 207
1499
+ },
1500
+ {
1501
+ "epoch": 0.983451536643026,
1502
+ "grad_norm": 0.3893062174320221,
1503
+ "learning_rate": 9.672014332028356e-08,
1504
+ "loss": 0.9161,
1505
+ "step": 208
1506
+ },
1507
+ {
1508
+ "epoch": 0.9881796690307328,
1509
+ "grad_norm": 0.39612168073654175,
1510
+ "learning_rate": 5.4412757869459763e-08,
1511
+ "loss": 1.0683,
1512
+ "step": 209
1513
+ },
1514
+ {
1515
+ "epoch": 0.9929078014184397,
1516
+ "grad_norm": 0.443149209022522,
1517
+ "learning_rate": 2.4185885400596075e-08,
1518
+ "loss": 1.0856,
1519
+ "step": 210
1520
+ },
1521
+ {
1522
+ "epoch": 0.9976359338061466,
1523
+ "grad_norm": 0.46631062030792236,
1524
+ "learning_rate": 6.04683699252373e-09,
1525
+ "loss": 1.101,
1526
+ "step": 211
1527
+ },
1528
+ {
1529
+ "epoch": 1.0023640661938533,
1530
+ "grad_norm": 0.9580826759338379,
1531
+ "learning_rate": 0.0,
1532
+ "loss": 1.865,
1533
+ "step": 212
1534
  }
1535
  ],
1536
  "logging_steps": 1,
 
1554
  "should_evaluate": false,
1555
  "should_log": false,
1556
  "should_save": true,
1557
+ "should_training_stop": true
1558
  },
1559
  "attributes": {}
1560
  }
1561
  },
1562
+ "total_flos": 2.8356191188942848e+17,
1563
  "train_batch_size": 8,
1564
  "trial_name": null,
1565
  "trial_params": null