ToastyPigeon commited on
Commit
da277d7
·
verified ·
1 Parent(s): c71f248

Training in progress, step 207, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34183b94352583baa7858f57390db2d68a56c0619a9cc2a8877970a3e57768b5
3
  size 1101095848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fbb834535d30942ff8926c5ac856548f98ca3a71f1c2f7d371cb8f822d0e3c
3
  size 1101095848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46ab5acbc435266a885ea5fe74d03b3a12ffbfc9d8cadf03847d5eaa375b398c
3
  size 841204242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d315cdc1640f0b8c81f2afdc924fde254c2c741910745b9daca663b83e61a59f
3
  size 841204242
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d0a9e30edf486e222ce5288139d14d2038d08ba04df3919e427e10dc6814539
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21bf96c648c2b81637c2a374c88eb7bd6aaef1de82d55c601d0b411131031f36
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6797f75af78591cc14e3bc03bbfcedcfc3cab59ea27d5dffbfe06c2a17992e46
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0398efb9f2d009f44e4675efc73a4fa2f0e6d741b98fe4c59c94a120cfb58052
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8070175438596491,
5
  "eval_steps": 23,
6
- "global_step": 184,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1367,6 +1367,175 @@
1367
  "eval_samples_per_second": 0.365,
1368
  "eval_steps_per_second": 0.365,
1369
  "step": 184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370
  }
1371
  ],
1372
  "logging_steps": 1,
@@ -1386,7 +1555,7 @@
1386
  "attributes": {}
1387
  }
1388
  },
1389
- "total_flos": 1.0322218694171689e+18,
1390
  "train_batch_size": 8,
1391
  "trial_name": null,
1392
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9078947368421053,
5
  "eval_steps": 23,
6
+ "global_step": 207,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1367
  "eval_samples_per_second": 0.365,
1368
  "eval_steps_per_second": 0.365,
1369
  "step": 184
1370
+ },
1371
+ {
1372
+ "epoch": 0.8114035087719298,
1373
+ "grad_norm": 0.04292619228363037,
1374
+ "learning_rate": 9.183446957694048e-06,
1375
+ "loss": 2.225,
1376
+ "step": 185
1377
+ },
1378
+ {
1379
+ "epoch": 0.8157894736842105,
1380
+ "grad_norm": 0.04302488639950752,
1381
+ "learning_rate": 8.997043104258856e-06,
1382
+ "loss": 2.0675,
1383
+ "step": 186
1384
+ },
1385
+ {
1386
+ "epoch": 0.8201754385964912,
1387
+ "grad_norm": 0.04218915104866028,
1388
+ "learning_rate": 8.814481808360945e-06,
1389
+ "loss": 2.1778,
1390
+ "step": 187
1391
+ },
1392
+ {
1393
+ "epoch": 0.8245614035087719,
1394
+ "grad_norm": 0.04348418116569519,
1395
+ "learning_rate": 8.635800982982958e-06,
1396
+ "loss": 2.2598,
1397
+ "step": 188
1398
+ },
1399
+ {
1400
+ "epoch": 0.8289473684210527,
1401
+ "grad_norm": 0.04379533231258392,
1402
+ "learning_rate": 8.461037735240047e-06,
1403
+ "loss": 2.223,
1404
+ "step": 189
1405
+ },
1406
+ {
1407
+ "epoch": 0.8333333333333334,
1408
+ "grad_norm": 0.04641556367278099,
1409
+ "learning_rate": 8.290228358673758e-06,
1410
+ "loss": 2.1633,
1411
+ "step": 190
1412
+ },
1413
+ {
1414
+ "epoch": 0.8377192982456141,
1415
+ "grad_norm": 0.04623427614569664,
1416
+ "learning_rate": 8.123408325714857e-06,
1417
+ "loss": 2.2546,
1418
+ "step": 191
1419
+ },
1420
+ {
1421
+ "epoch": 0.8421052631578947,
1422
+ "grad_norm": 0.04348750412464142,
1423
+ "learning_rate": 7.960612280316673e-06,
1424
+ "loss": 2.2283,
1425
+ "step": 192
1426
+ },
1427
+ {
1428
+ "epoch": 0.8464912280701754,
1429
+ "grad_norm": 0.04299633204936981,
1430
+ "learning_rate": 7.801874030760472e-06,
1431
+ "loss": 2.2155,
1432
+ "step": 193
1433
+ },
1434
+ {
1435
+ "epoch": 0.8508771929824561,
1436
+ "grad_norm": 0.04249183461070061,
1437
+ "learning_rate": 7.647226542634454e-06,
1438
+ "loss": 2.2647,
1439
+ "step": 194
1440
+ },
1441
+ {
1442
+ "epoch": 0.8552631578947368,
1443
+ "grad_norm": 0.04467320442199707,
1444
+ "learning_rate": 7.49670193198766e-06,
1445
+ "loss": 2.3202,
1446
+ "step": 195
1447
+ },
1448
+ {
1449
+ "epoch": 0.8596491228070176,
1450
+ "grad_norm": 0.04538441821932793,
1451
+ "learning_rate": 7.350331458660367e-06,
1452
+ "loss": 2.0542,
1453
+ "step": 196
1454
+ },
1455
+ {
1456
+ "epoch": 0.8640350877192983,
1457
+ "grad_norm": 0.04282210022211075,
1458
+ "learning_rate": 7.208145519792266e-06,
1459
+ "loss": 2.3344,
1460
+ "step": 197
1461
+ },
1462
+ {
1463
+ "epoch": 0.868421052631579,
1464
+ "grad_norm": 0.042627353221178055,
1465
+ "learning_rate": 7.0701736435098155e-06,
1466
+ "loss": 2.3739,
1467
+ "step": 198
1468
+ },
1469
+ {
1470
+ "epoch": 0.8728070175438597,
1471
+ "grad_norm": 0.04885130748152733,
1472
+ "learning_rate": 6.936444482794065e-06,
1473
+ "loss": 2.2614,
1474
+ "step": 199
1475
+ },
1476
+ {
1477
+ "epoch": 0.8771929824561403,
1478
+ "grad_norm": 0.04192091524600983,
1479
+ "learning_rate": 6.806985809530189e-06,
1480
+ "loss": 2.0821,
1481
+ "step": 200
1482
+ },
1483
+ {
1484
+ "epoch": 0.881578947368421,
1485
+ "grad_norm": 0.04542316868901253,
1486
+ "learning_rate": 6.6818245087400574e-06,
1487
+ "loss": 2.3226,
1488
+ "step": 201
1489
+ },
1490
+ {
1491
+ "epoch": 0.8859649122807017,
1492
+ "grad_norm": 0.0446937195956707,
1493
+ "learning_rate": 6.56098657299893e-06,
1494
+ "loss": 2.342,
1495
+ "step": 202
1496
+ },
1497
+ {
1498
+ "epoch": 0.8903508771929824,
1499
+ "grad_norm": 0.04320209473371506,
1500
+ "learning_rate": 6.444497097037532e-06,
1501
+ "loss": 2.1945,
1502
+ "step": 203
1503
+ },
1504
+ {
1505
+ "epoch": 0.8947368421052632,
1506
+ "grad_norm": 0.04684532806277275,
1507
+ "learning_rate": 6.332380272530536e-06,
1508
+ "loss": 2.2744,
1509
+ "step": 204
1510
+ },
1511
+ {
1512
+ "epoch": 0.8991228070175439,
1513
+ "grad_norm": 0.04657423868775368,
1514
+ "learning_rate": 6.224659383072649e-06,
1515
+ "loss": 2.1249,
1516
+ "step": 205
1517
+ },
1518
+ {
1519
+ "epoch": 0.9035087719298246,
1520
+ "grad_norm": 0.04765097796916962,
1521
+ "learning_rate": 6.1213567993432085e-06,
1522
+ "loss": 2.1456,
1523
+ "step": 206
1524
+ },
1525
+ {
1526
+ "epoch": 0.9078947368421053,
1527
+ "grad_norm": 0.047186579555273056,
1528
+ "learning_rate": 6.022493974460447e-06,
1529
+ "loss": 2.2972,
1530
+ "step": 207
1531
+ },
1532
+ {
1533
+ "epoch": 0.9078947368421053,
1534
+ "eval_loss": 2.163572311401367,
1535
+ "eval_runtime": 218.9467,
1536
+ "eval_samples_per_second": 0.365,
1537
+ "eval_steps_per_second": 0.365,
1538
+ "step": 207
1539
  }
1540
  ],
1541
  "logging_steps": 1,
 
1555
  "attributes": {}
1556
  }
1557
  },
1558
+ "total_flos": 1.161249603094315e+18,
1559
  "train_batch_size": 8,
1560
  "trial_name": null,
1561
  "trial_params": null