neuralwonderland commited on
Commit
977944d
·
verified ·
1 Parent(s): 6e3387c

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e409cb8fa744b254b1a8c345ea629dc3b28f081324279756fdc5c325a01c5feb
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c9d6fa4bd4a7f811d406dc78d71b6277f7a33c87b8dbb1bfc5e8b11944025e3
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88ee48e40e6035d9aba926a6f7808928c1169d339755ea3ab6a85192b547dd45
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b109b4bcd5ed5713f06eb80d62c0cf92f198120ce7084a71f7374bbebfdbbe7
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3815ae16dc40e74efcece45e896c9f2b9fa451866c099ae3e85989a7d6af3d1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94c6399f1cb9c475b90c0c57344eb7d7504723cecd3f0cbfb01bf8e52584025
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f9ae45a4a87e9c622f6c946115882b07eb3df1355feb3bb786f6ae49ca483d1
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ad5f767960ba7d6ae74d9562642b460717a348f4fe2a9ad2bcc901ddcae8168
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2077045440673828,
3
  "best_model_checkpoint": "./output/checkpoint-1950",
4
- "epoch": 0.08734602463605823,
5
  "eval_steps": 150,
6
- "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1476,6 +1476,119 @@
1476
  "eval_samples_per_second": 9.666,
1477
  "eval_steps_per_second": 9.666,
1478
  "step": 1950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1479
  }
1480
  ],
1481
  "logging_steps": 10,
@@ -1495,7 +1608,7 @@
1495
  "attributes": {}
1496
  }
1497
  },
1498
- "total_flos": 2.518965478232064e+17,
1499
  "train_batch_size": 4,
1500
  "trial_name": null,
1501
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2077045440673828,
3
  "best_model_checkpoint": "./output/checkpoint-1950",
4
+ "epoch": 0.0940649496080627,
5
  "eval_steps": 150,
6
+ "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1476
  "eval_samples_per_second": 9.666,
1477
  "eval_steps_per_second": 9.666,
1478
  "step": 1950
1479
+ },
1480
+ {
1481
+ "epoch": 0.0877939529675252,
1482
+ "grad_norm": 6.297740459442139,
1483
+ "learning_rate": 5.134936822616837e-06,
1484
+ "loss": 1.1664,
1485
+ "step": 1960
1486
+ },
1487
+ {
1488
+ "epoch": 0.08824188129899216,
1489
+ "grad_norm": 5.478749752044678,
1490
+ "learning_rate": 5.112565447607669e-06,
1491
+ "loss": 1.2503,
1492
+ "step": 1970
1493
+ },
1494
+ {
1495
+ "epoch": 0.08868980963045912,
1496
+ "grad_norm": 4.692316055297852,
1497
+ "learning_rate": 5.090138062869755e-06,
1498
+ "loss": 1.1421,
1499
+ "step": 1980
1500
+ },
1501
+ {
1502
+ "epoch": 0.08913773796192609,
1503
+ "grad_norm": 3.5623536109924316,
1504
+ "learning_rate": 5.067655590305036e-06,
1505
+ "loss": 1.1203,
1506
+ "step": 1990
1507
+ },
1508
+ {
1509
+ "epoch": 0.08958566629339305,
1510
+ "grad_norm": 6.875621318817139,
1511
+ "learning_rate": 5.045118954079904e-06,
1512
+ "loss": 1.1348,
1513
+ "step": 2000
1514
+ },
1515
+ {
1516
+ "epoch": 0.09003359462486002,
1517
+ "grad_norm": 5.2604756355285645,
1518
+ "learning_rate": 5.022529080587205e-06,
1519
+ "loss": 1.0326,
1520
+ "step": 2010
1521
+ },
1522
+ {
1523
+ "epoch": 0.09048152295632698,
1524
+ "grad_norm": 5.012307643890381,
1525
+ "learning_rate": 4.999886898408157e-06,
1526
+ "loss": 1.12,
1527
+ "step": 2020
1528
+ },
1529
+ {
1530
+ "epoch": 0.09092945128779395,
1531
+ "grad_norm": 5.246688365936279,
1532
+ "learning_rate": 4.977193338274189e-06,
1533
+ "loss": 1.1164,
1534
+ "step": 2030
1535
+ },
1536
+ {
1537
+ "epoch": 0.09137737961926092,
1538
+ "grad_norm": 3.9779398441314697,
1539
+ "learning_rate": 4.954449333028672e-06,
1540
+ "loss": 1.0607,
1541
+ "step": 2040
1542
+ },
1543
+ {
1544
+ "epoch": 0.09182530795072788,
1545
+ "grad_norm": 5.392056465148926,
1546
+ "learning_rate": 4.931655817588579e-06,
1547
+ "loss": 1.1102,
1548
+ "step": 2050
1549
+ },
1550
+ {
1551
+ "epoch": 0.09227323628219485,
1552
+ "grad_norm": 5.144470691680908,
1553
+ "learning_rate": 4.9088137289060535e-06,
1554
+ "loss": 1.0649,
1555
+ "step": 2060
1556
+ },
1557
+ {
1558
+ "epoch": 0.09272116461366181,
1559
+ "grad_norm": 3.7060792446136475,
1560
+ "learning_rate": 4.885924005929896e-06,
1561
+ "loss": 1.0718,
1562
+ "step": 2070
1563
+ },
1564
+ {
1565
+ "epoch": 0.09316909294512878,
1566
+ "grad_norm": 3.357794761657715,
1567
+ "learning_rate": 4.862987589566965e-06,
1568
+ "loss": 1.1003,
1569
+ "step": 2080
1570
+ },
1571
+ {
1572
+ "epoch": 0.09361702127659574,
1573
+ "grad_norm": 5.704718589782715,
1574
+ "learning_rate": 4.840005422643503e-06,
1575
+ "loss": 1.2042,
1576
+ "step": 2090
1577
+ },
1578
+ {
1579
+ "epoch": 0.0940649496080627,
1580
+ "grad_norm": 5.481514930725098,
1581
+ "learning_rate": 4.816978449866372e-06,
1582
+ "loss": 1.0777,
1583
+ "step": 2100
1584
+ },
1585
+ {
1586
+ "epoch": 0.0940649496080627,
1587
+ "eval_loss": 1.2093305587768555,
1588
+ "eval_runtime": 51.7975,
1589
+ "eval_samples_per_second": 9.653,
1590
+ "eval_steps_per_second": 9.653,
1591
+ "step": 2100
1592
  }
1593
  ],
1594
  "logging_steps": 10,
 
1608
  "attributes": {}
1609
  }
1610
  },
1611
+ "total_flos": 2.72182600608768e+17,
1612
  "train_batch_size": 4,
1613
  "trial_name": null,
1614
  "trial_params": null