farmery commited on
Commit
94d711c
·
verified ·
1 Parent(s): 81bbd4e

Training in progress, step 209, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6edb0539f97709cea271de72099d52c94bb0597166f3f4aa5b683b9d3a40338
3
  size 50503544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb3c042fc05446cce42e8ba05c26d21f6a33fad1d276065c31751e9eb9b92c9a
3
  size 50503544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2499bbfa4e07b794c1813ff851011b0c63cdce04787dbd7aabaeb49ee1484df7
3
  size 101184122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9161dc0d5b45d102a25e69107d79b474ed640e72b157f48abc64aaaed3deb54
3
  size 101184122
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e67e3986f9b5d21cf143e8fff112d872e8f773d9018b5fb314111df7f4592c35
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32f074aa0dc3bbbaf32ed9b3d0e52c2afafd75640cf1eb31d812ea245e926858
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b1b8e48d9009abc474691522115451b9e2ab07413f2618da36c748a460ecac4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40297b54e197ed589332b6920de875b05d1178917ecfa4de495baf5faadb1bb0
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59fda8ef542f982c7a174ffae9f10913c30cef756ee4217304ce41dd5c496953
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84f1f6e7cbd40691344b0f55583c6580c77c289b6e22a00d28c5c1f7ecb12ce0
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8a9db533c3e04750882b6ad94d0935c91d30807f5a987a39b39bc075beb2051
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c49b3899e9febf007ca0e81d807f4d5855313b577082d469902339f58ac46d
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf6c7c2e48585167b8ae09e4fab626fc8f2a4ca56cc9826daed29b7acb9cc601
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c0a96fa3a122e63ba1eeeef721578b9cf44b040fe3f79eba83b9517ca4ac4c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.539456307888031,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 2.8793103448275863,
5
  "eval_steps": 25,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1479,6 +1479,69 @@
1479
  "eval_samples_per_second": 57.511,
1480
  "eval_steps_per_second": 3.451,
1481
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1482
  }
1483
  ],
1484
  "logging_steps": 1,
@@ -1502,12 +1565,12 @@
1502
  "should_evaluate": false,
1503
  "should_log": false,
1504
  "should_save": true,
1505
- "should_training_stop": false
1506
  },
1507
  "attributes": {}
1508
  }
1509
  },
1510
- "total_flos": 3.6169443950618214e+17,
1511
  "train_batch_size": 6,
1512
  "trial_name": null,
1513
  "trial_params": null
 
1
  {
2
  "best_metric": 0.539456307888031,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 3.0114942528735633,
5
  "eval_steps": 25,
6
+ "global_step": 209,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1479
  "eval_samples_per_second": 57.511,
1480
  "eval_steps_per_second": 3.451,
1481
  "step": 200
1482
+ },
1483
+ {
1484
+ "epoch": 2.8936781609195403,
1485
+ "grad_norm": 0.3132757842540741,
1486
+ "learning_rate": 2.0702642165868325e-05,
1487
+ "loss": 0.3531,
1488
+ "step": 201
1489
+ },
1490
+ {
1491
+ "epoch": 2.9080459770114944,
1492
+ "grad_norm": 0.49438345432281494,
1493
+ "learning_rate": 2.053812474481016e-05,
1494
+ "loss": 1.0436,
1495
+ "step": 202
1496
+ },
1497
+ {
1498
+ "epoch": 2.9224137931034484,
1499
+ "grad_norm": 0.2360573410987854,
1500
+ "learning_rate": 2.0395461618029175e-05,
1501
+ "loss": 0.5821,
1502
+ "step": 203
1503
+ },
1504
+ {
1505
+ "epoch": 2.9367816091954024,
1506
+ "grad_norm": 0.19015099108219147,
1507
+ "learning_rate": 2.0274687636146892e-05,
1508
+ "loss": 0.4832,
1509
+ "step": 204
1510
+ },
1511
+ {
1512
+ "epoch": 2.9511494252873565,
1513
+ "grad_norm": 0.24269899725914001,
1514
+ "learning_rate": 2.0175832302570575e-05,
1515
+ "loss": 0.5398,
1516
+ "step": 205
1517
+ },
1518
+ {
1519
+ "epoch": 2.9655172413793105,
1520
+ "grad_norm": 0.25860899686813354,
1521
+ "learning_rate": 2.0098919766285978e-05,
1522
+ "loss": 0.5282,
1523
+ "step": 206
1524
+ },
1525
+ {
1526
+ "epoch": 2.9798850574712645,
1527
+ "grad_norm": 0.2763611674308777,
1528
+ "learning_rate": 2.0043968815958072e-05,
1529
+ "loss": 0.5299,
1530
+ "step": 207
1531
+ },
1532
+ {
1533
+ "epoch": 2.9942528735632186,
1534
+ "grad_norm": 0.6282367706298828,
1535
+ "learning_rate": 2.0010992875341232e-05,
1536
+ "loss": 0.63,
1537
+ "step": 208
1538
+ },
1539
+ {
1540
+ "epoch": 3.0114942528735633,
1541
+ "grad_norm": 0.7394906282424927,
1542
+ "learning_rate": 2e-05,
1543
+ "loss": 1.2121,
1544
+ "step": 209
1545
  }
1546
  ],
1547
  "logging_steps": 1,
 
1565
  "should_evaluate": false,
1566
  "should_log": false,
1567
  "should_save": true,
1568
+ "should_training_stop": true
1569
  },
1570
  "attributes": {}
1571
  }
1572
  },
1573
+ "total_flos": 3.7867902377499034e+17,
1574
  "train_batch_size": 6,
1575
  "trial_name": null,
1576
  "trial_params": null