dq158 commited on
Commit
a29b52c
·
1 Parent(s): c9bd16a

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96a7404b8a9bf4f3429340685e7b73eeabd93d59ae6548dc2c60b1db6f076d93
3
  size 37789864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b127cfb645d1c75f132a876553609b5374befd9def67d558e5974fa97067d21c
3
  size 37789864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:330d9e1204813ac3853aa721f905d8c6b2cb7ac5600d6e2c7c1e574f2fdb63b6
3
  size 2622266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d9a221b97b4a103c3bbd0fd8abf8231538258d0486fa04d26296da0b8c69c4
3
  size 2622266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e7f9a5c7d8d5bfdb0fe5d750f5f6aec4fd34af75f1125908370bc0f6572779a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9b188ef411c860fa162f3665ea4a77b4e06ff07ab727a5f17c10e78d9237ed
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e3ba21b67351b0add0fbc5afde8275e09da00755e1bac11b87c285f4525c581
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29fd4cd191139dd97c05600899e5a66a54004c5898470ac779c283abb76b7689
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.8476955890655518,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-102044",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 102044,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1307,13 +1307,338 @@
1307
  "eval_steps_per_second": 0.291,
1308
  "eval_translation_length": 5805056,
1309
  "step": 102044
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1310
  }
1311
  ],
1312
  "logging_steps": 500,
1313
  "max_steps": 765330,
1314
  "num_train_epochs": 30,
1315
  "save_steps": 1000,
1316
- "total_flos": 3.502614878298833e+18,
1317
  "trial_name": null,
1318
  "trial_params": null
1319
  }
 
1
  {
2
+ "best_metric": 2.8255138397216797,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-127555",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 127555,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1307
  "eval_steps_per_second": 0.291,
1308
  "eval_translation_length": 5805056,
1309
  "step": 102044
1310
+ },
1311
+ {
1312
+ "epoch": 4.02,
1313
+ "learning_rate": 7.65403033800726e-05,
1314
+ "loss": 2.9547,
1315
+ "step": 102500
1316
+ },
1317
+ {
1318
+ "epoch": 4.04,
1319
+ "learning_rate": 7.6506805368716e-05,
1320
+ "loss": 2.8596,
1321
+ "step": 103000
1322
+ },
1323
+ {
1324
+ "epoch": 4.06,
1325
+ "learning_rate": 7.647315337052083e-05,
1326
+ "loss": 2.9155,
1327
+ "step": 103500
1328
+ },
1329
+ {
1330
+ "epoch": 4.08,
1331
+ "learning_rate": 7.643934752743228e-05,
1332
+ "loss": 2.8802,
1333
+ "step": 104000
1334
+ },
1335
+ {
1336
+ "epoch": 4.1,
1337
+ "learning_rate": 7.640538798204443e-05,
1338
+ "loss": 2.9248,
1339
+ "step": 104500
1340
+ },
1341
+ {
1342
+ "epoch": 4.12,
1343
+ "learning_rate": 7.63712748775997e-05,
1344
+ "loss": 2.8865,
1345
+ "step": 105000
1346
+ },
1347
+ {
1348
+ "epoch": 4.14,
1349
+ "learning_rate": 7.633700835798824e-05,
1350
+ "loss": 2.9383,
1351
+ "step": 105500
1352
+ },
1353
+ {
1354
+ "epoch": 4.16,
1355
+ "learning_rate": 7.630258856774726e-05,
1356
+ "loss": 2.8581,
1357
+ "step": 106000
1358
+ },
1359
+ {
1360
+ "epoch": 4.17,
1361
+ "learning_rate": 7.626801565206054e-05,
1362
+ "loss": 2.9359,
1363
+ "step": 106500
1364
+ },
1365
+ {
1366
+ "epoch": 4.19,
1367
+ "learning_rate": 7.623328975675768e-05,
1368
+ "loss": 2.9459,
1369
+ "step": 107000
1370
+ },
1371
+ {
1372
+ "epoch": 4.21,
1373
+ "learning_rate": 7.619841102831362e-05,
1374
+ "loss": 2.9888,
1375
+ "step": 107500
1376
+ },
1377
+ {
1378
+ "epoch": 4.23,
1379
+ "learning_rate": 7.616337961384787e-05,
1380
+ "loss": 2.8815,
1381
+ "step": 108000
1382
+ },
1383
+ {
1384
+ "epoch": 4.25,
1385
+ "learning_rate": 7.612819566112408e-05,
1386
+ "loss": 2.9105,
1387
+ "step": 108500
1388
+ },
1389
+ {
1390
+ "epoch": 4.27,
1391
+ "learning_rate": 7.609285931854922e-05,
1392
+ "loss": 2.9028,
1393
+ "step": 109000
1394
+ },
1395
+ {
1396
+ "epoch": 4.29,
1397
+ "learning_rate": 7.605737073517308e-05,
1398
+ "loss": 2.9141,
1399
+ "step": 109500
1400
+ },
1401
+ {
1402
+ "epoch": 4.31,
1403
+ "learning_rate": 7.602173006068763e-05,
1404
+ "loss": 2.9244,
1405
+ "step": 110000
1406
+ },
1407
+ {
1408
+ "epoch": 4.33,
1409
+ "learning_rate": 7.598593744542632e-05,
1410
+ "loss": 2.8534,
1411
+ "step": 110500
1412
+ },
1413
+ {
1414
+ "epoch": 4.35,
1415
+ "learning_rate": 7.594999304036352e-05,
1416
+ "loss": 2.8936,
1417
+ "step": 111000
1418
+ },
1419
+ {
1420
+ "epoch": 4.37,
1421
+ "learning_rate": 7.591389699711384e-05,
1422
+ "loss": 2.8866,
1423
+ "step": 111500
1424
+ },
1425
+ {
1426
+ "epoch": 4.39,
1427
+ "learning_rate": 7.587764946793153e-05,
1428
+ "loss": 2.9335,
1429
+ "step": 112000
1430
+ },
1431
+ {
1432
+ "epoch": 4.41,
1433
+ "learning_rate": 7.584125060570976e-05,
1434
+ "loss": 2.7982,
1435
+ "step": 112500
1436
+ },
1437
+ {
1438
+ "epoch": 4.43,
1439
+ "learning_rate": 7.58047005639801e-05,
1440
+ "loss": 2.8565,
1441
+ "step": 113000
1442
+ },
1443
+ {
1444
+ "epoch": 4.45,
1445
+ "learning_rate": 7.576799949691174e-05,
1446
+ "loss": 2.8682,
1447
+ "step": 113500
1448
+ },
1449
+ {
1450
+ "epoch": 4.47,
1451
+ "learning_rate": 7.573114755931093e-05,
1452
+ "loss": 2.887,
1453
+ "step": 114000
1454
+ },
1455
+ {
1456
+ "epoch": 4.49,
1457
+ "learning_rate": 7.569414490662027e-05,
1458
+ "loss": 2.9001,
1459
+ "step": 114500
1460
+ },
1461
+ {
1462
+ "epoch": 4.51,
1463
+ "learning_rate": 7.565699169491811e-05,
1464
+ "loss": 2.8804,
1465
+ "step": 115000
1466
+ },
1467
+ {
1468
+ "epoch": 4.53,
1469
+ "learning_rate": 7.561968808091784e-05,
1470
+ "loss": 2.9395,
1471
+ "step": 115500
1472
+ },
1473
+ {
1474
+ "epoch": 4.55,
1475
+ "learning_rate": 7.558223422196728e-05,
1476
+ "loss": 2.922,
1477
+ "step": 116000
1478
+ },
1479
+ {
1480
+ "epoch": 4.57,
1481
+ "learning_rate": 7.554463027604798e-05,
1482
+ "loss": 2.8777,
1483
+ "step": 116500
1484
+ },
1485
+ {
1486
+ "epoch": 4.59,
1487
+ "learning_rate": 7.550687640177451e-05,
1488
+ "loss": 2.8746,
1489
+ "step": 117000
1490
+ },
1491
+ {
1492
+ "epoch": 4.61,
1493
+ "learning_rate": 7.54689727583939e-05,
1494
+ "loss": 2.9362,
1495
+ "step": 117500
1496
+ },
1497
+ {
1498
+ "epoch": 4.63,
1499
+ "learning_rate": 7.543091950578492e-05,
1500
+ "loss": 2.8282,
1501
+ "step": 118000
1502
+ },
1503
+ {
1504
+ "epoch": 4.65,
1505
+ "learning_rate": 7.539271680445734e-05,
1506
+ "loss": 2.8902,
1507
+ "step": 118500
1508
+ },
1509
+ {
1510
+ "epoch": 4.66,
1511
+ "learning_rate": 7.535436481555135e-05,
1512
+ "loss": 2.9123,
1513
+ "step": 119000
1514
+ },
1515
+ {
1516
+ "epoch": 4.68,
1517
+ "learning_rate": 7.531586370083684e-05,
1518
+ "loss": 2.847,
1519
+ "step": 119500
1520
+ },
1521
+ {
1522
+ "epoch": 4.7,
1523
+ "learning_rate": 7.527721362271269e-05,
1524
+ "loss": 2.8664,
1525
+ "step": 120000
1526
+ },
1527
+ {
1528
+ "epoch": 4.72,
1529
+ "learning_rate": 7.523841474420614e-05,
1530
+ "loss": 2.9341,
1531
+ "step": 120500
1532
+ },
1533
+ {
1534
+ "epoch": 4.74,
1535
+ "learning_rate": 7.519946722897209e-05,
1536
+ "loss": 2.8942,
1537
+ "step": 121000
1538
+ },
1539
+ {
1540
+ "epoch": 4.76,
1541
+ "learning_rate": 7.516037124129231e-05,
1542
+ "loss": 2.9092,
1543
+ "step": 121500
1544
+ },
1545
+ {
1546
+ "epoch": 4.78,
1547
+ "learning_rate": 7.512112694607494e-05,
1548
+ "loss": 2.8891,
1549
+ "step": 122000
1550
+ },
1551
+ {
1552
+ "epoch": 4.8,
1553
+ "learning_rate": 7.508173450885361e-05,
1554
+ "loss": 2.9581,
1555
+ "step": 122500
1556
+ },
1557
+ {
1558
+ "epoch": 4.82,
1559
+ "learning_rate": 7.504219409578685e-05,
1560
+ "loss": 3.0015,
1561
+ "step": 123000
1562
+ },
1563
+ {
1564
+ "epoch": 4.84,
1565
+ "learning_rate": 7.500250587365735e-05,
1566
+ "loss": 2.9472,
1567
+ "step": 123500
1568
+ },
1569
+ {
1570
+ "epoch": 4.86,
1571
+ "learning_rate": 7.496267000987127e-05,
1572
+ "loss": 2.8975,
1573
+ "step": 124000
1574
+ },
1575
+ {
1576
+ "epoch": 4.88,
1577
+ "learning_rate": 7.49226866724575e-05,
1578
+ "loss": 2.8226,
1579
+ "step": 124500
1580
+ },
1581
+ {
1582
+ "epoch": 4.9,
1583
+ "learning_rate": 7.488255603006703e-05,
1584
+ "loss": 2.8738,
1585
+ "step": 125000
1586
+ },
1587
+ {
1588
+ "epoch": 4.92,
1589
+ "learning_rate": 7.484227825197214e-05,
1590
+ "loss": 2.9323,
1591
+ "step": 125500
1592
+ },
1593
+ {
1594
+ "epoch": 4.94,
1595
+ "learning_rate": 7.480185350806574e-05,
1596
+ "loss": 2.9436,
1597
+ "step": 126000
1598
+ },
1599
+ {
1600
+ "epoch": 4.96,
1601
+ "learning_rate": 7.476128196886067e-05,
1602
+ "loss": 2.8861,
1603
+ "step": 126500
1604
+ },
1605
+ {
1606
+ "epoch": 4.98,
1607
+ "learning_rate": 7.472056380548893e-05,
1608
+ "loss": 2.9255,
1609
+ "step": 127000
1610
+ },
1611
+ {
1612
+ "epoch": 5.0,
1613
+ "learning_rate": 7.467969918970099e-05,
1614
+ "loss": 2.8591,
1615
+ "step": 127500
1616
+ },
1617
+ {
1618
+ "epoch": 5.0,
1619
+ "eval_bleu": 1.0,
1620
+ "eval_brevity_penalty": 1.0,
1621
+ "eval_length_ratio": 1.0,
1622
+ "eval_loss": 2.8255138397216797,
1623
+ "eval_precisions": [
1624
+ 1.0,
1625
+ 1.0,
1626
+ 1.0,
1627
+ 1.0
1628
+ ],
1629
+ "eval_reference_length": 5805056,
1630
+ "eval_runtime": 10189.5043,
1631
+ "eval_samples_per_second": 1.113,
1632
+ "eval_steps_per_second": 0.278,
1633
+ "eval_translation_length": 5805056,
1634
+ "step": 127555
1635
  }
1636
  ],
1637
  "logging_steps": 500,
1638
  "max_steps": 765330,
1639
  "num_train_epochs": 30,
1640
  "save_steps": 1000,
1641
+ "total_flos": 4.378268597873541e+18,
1642
  "trial_name": null,
1643
  "trial_params": null
1644
  }