neuralwonderland commited on
Commit
a6c10ce
·
verified ·
1 Parent(s): 640db51

Training in progress, step 1950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36fe7e658428f711dcac3d3150a53f472cf7a88f298f10e1ccb4006a342cf7ee
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e3b8ef376785e9cafb621fb5a7157d2bb4e3e92e06716e4f79376be8ddd8a67
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:728e85f498d27e20d6bb5a67c2c8b3daa26aa2ad070e1317c483548a70ea1b09
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb707caa92720c742f6b5057c64ce456f97a0a305c683597e96d6c49c325b1bc
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e98115984b9346c14fe15b1988bbea4d82315ae453852800d3c136c2f59ac9ea
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fbad829d483b339a26e87847aafc349f9b5f5fa2f69baeccc578f19a8c8e5cf
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79d3369553c9b3ca0b9714ec489c1f0dddd900e219ab7f3d72b5e2b926fc5857
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dab5dfc5f8a218f5b5332643f6da5beb50a0e7236abc5407e39e2467ad257ef9
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8798418045043945,
3
- "best_model_checkpoint": "./output/checkpoint-1800",
4
- "epoch": 0.22376926902038788,
5
  "eval_steps": 150,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1363,6 +1363,119 @@
1363
  "eval_samples_per_second": 8.902,
1364
  "eval_steps_per_second": 8.902,
1365
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1366
  }
1367
  ],
1368
  "logging_steps": 10,
@@ -1382,7 +1495,7 @@
1382
  "attributes": {}
1383
  }
1384
  },
1385
- "total_flos": 6.243483263622144e+16,
1386
  "train_batch_size": 16,
1387
  "trial_name": null,
1388
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8656662106513977,
3
+ "best_model_checkpoint": "./output/checkpoint-1950",
4
+ "epoch": 0.24241670810542018,
5
  "eval_steps": 150,
6
+ "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1363
  "eval_samples_per_second": 8.902,
1364
  "eval_steps_per_second": 8.902,
1365
  "step": 1800
1366
+ },
1367
+ {
1368
+ "epoch": 0.2250124316260567,
1369
+ "grad_norm": 1.6383726596832275,
1370
+ "learning_rate": 9.105277923649698e-05,
1371
+ "loss": 0.6029,
1372
+ "step": 1810
1373
+ },
1374
+ {
1375
+ "epoch": 0.2262555942317255,
1376
+ "grad_norm": 0.863681435585022,
1377
+ "learning_rate": 9.06957416400209e-05,
1378
+ "loss": 0.6647,
1379
+ "step": 1820
1380
+ },
1381
+ {
1382
+ "epoch": 0.22749875683739434,
1383
+ "grad_norm": 1.0826516151428223,
1384
+ "learning_rate": 9.03375450270412e-05,
1385
+ "loss": 0.6775,
1386
+ "step": 1830
1387
+ },
1388
+ {
1389
+ "epoch": 0.22874191944306316,
1390
+ "grad_norm": 1.2367980480194092,
1391
+ "learning_rate": 8.997820412161764e-05,
1392
+ "loss": 0.7778,
1393
+ "step": 1840
1394
+ },
1395
+ {
1396
+ "epoch": 0.22998508204873197,
1397
+ "grad_norm": 1.4348937273025513,
1398
+ "learning_rate": 8.961773369484738e-05,
1399
+ "loss": 0.6699,
1400
+ "step": 1850
1401
+ },
1402
+ {
1403
+ "epoch": 0.23122824465440078,
1404
+ "grad_norm": 0.9706162810325623,
1405
+ "learning_rate": 8.925614856425786e-05,
1406
+ "loss": 0.684,
1407
+ "step": 1860
1408
+ },
1409
+ {
1410
+ "epoch": 0.23247140726006962,
1411
+ "grad_norm": 1.4127984046936035,
1412
+ "learning_rate": 8.88934635931975e-05,
1413
+ "loss": 0.6667,
1414
+ "step": 1870
1415
+ },
1416
+ {
1417
+ "epoch": 0.23371456986573844,
1418
+ "grad_norm": 1.4040454626083374,
1419
+ "learning_rate": 8.852969369022494e-05,
1420
+ "loss": 0.6014,
1421
+ "step": 1880
1422
+ },
1423
+ {
1424
+ "epoch": 0.23495773247140725,
1425
+ "grad_norm": 1.3731218576431274,
1426
+ "learning_rate": 8.816485380849613e-05,
1427
+ "loss": 0.7063,
1428
+ "step": 1890
1429
+ },
1430
+ {
1431
+ "epoch": 0.2362008950770761,
1432
+ "grad_norm": 1.5299303531646729,
1433
+ "learning_rate": 8.779895894514961e-05,
1434
+ "loss": 0.6177,
1435
+ "step": 1900
1436
+ },
1437
+ {
1438
+ "epoch": 0.2374440576827449,
1439
+ "grad_norm": 1.3770627975463867,
1440
+ "learning_rate": 8.743202414069011e-05,
1441
+ "loss": 0.6487,
1442
+ "step": 1910
1443
+ },
1444
+ {
1445
+ "epoch": 0.23868722028841372,
1446
+ "grad_norm": 1.1185230016708374,
1447
+ "learning_rate": 8.706406447837023e-05,
1448
+ "loss": 0.6612,
1449
+ "step": 1920
1450
+ },
1451
+ {
1452
+ "epoch": 0.23993038289408256,
1453
+ "grad_norm": 1.027255654335022,
1454
+ "learning_rate": 8.669509508357052e-05,
1455
+ "loss": 0.6125,
1456
+ "step": 1930
1457
+ },
1458
+ {
1459
+ "epoch": 0.24117354549975137,
1460
+ "grad_norm": 1.053480625152588,
1461
+ "learning_rate": 8.632513112317761e-05,
1462
+ "loss": 0.6614,
1463
+ "step": 1940
1464
+ },
1465
+ {
1466
+ "epoch": 0.24241670810542018,
1467
+ "grad_norm": 1.3678585290908813,
1468
+ "learning_rate": 8.59541878049609e-05,
1469
+ "loss": 0.5761,
1470
+ "step": 1950
1471
+ },
1472
+ {
1473
+ "epoch": 0.24241670810542018,
1474
+ "eval_loss": 0.8656662106513977,
1475
+ "eval_runtime": 56.0937,
1476
+ "eval_samples_per_second": 8.914,
1477
+ "eval_steps_per_second": 8.914,
1478
+ "step": 1950
1479
  }
1480
  ],
1481
  "logging_steps": 10,
 
1495
  "attributes": {}
1496
  }
1497
  },
1498
+ "total_flos": 6.767178816165888e+16,
1499
  "train_batch_size": 16,
1500
  "trial_name": null,
1501
  "trial_params": null