Training in progress, step 1950, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 69527352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e3b8ef376785e9cafb621fb5a7157d2bb4e3e92e06716e4f79376be8ddd8a67
|
3 |
size 69527352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 139313554
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb707caa92720c742f6b5057c64ce456f97a0a305c683597e96d6c49c325b1bc
|
3 |
size 139313554
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fbad829d483b339a26e87847aafc349f9b5f5fa2f69baeccc578f19a8c8e5cf
|
3 |
size 14308
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dab5dfc5f8a218f5b5332643f6da5beb50a0e7236abc5407e39e2467ad257ef9
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1363,6 +1363,119 @@
|
|
1363 |
"eval_samples_per_second": 8.902,
|
1364 |
"eval_steps_per_second": 8.902,
|
1365 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1366 |
}
|
1367 |
],
|
1368 |
"logging_steps": 10,
|
@@ -1382,7 +1495,7 @@
|
|
1382 |
"attributes": {}
|
1383 |
}
|
1384 |
},
|
1385 |
-
"total_flos": 6.
|
1386 |
"train_batch_size": 16,
|
1387 |
"trial_name": null,
|
1388 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8656662106513977,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-1950",
|
4 |
+
"epoch": 0.24241670810542018,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 1950,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1363 |
"eval_samples_per_second": 8.902,
|
1364 |
"eval_steps_per_second": 8.902,
|
1365 |
"step": 1800
|
1366 |
+
},
|
1367 |
+
{
|
1368 |
+
"epoch": 0.2250124316260567,
|
1369 |
+
"grad_norm": 1.6383726596832275,
|
1370 |
+
"learning_rate": 9.105277923649698e-05,
|
1371 |
+
"loss": 0.6029,
|
1372 |
+
"step": 1810
|
1373 |
+
},
|
1374 |
+
{
|
1375 |
+
"epoch": 0.2262555942317255,
|
1376 |
+
"grad_norm": 0.863681435585022,
|
1377 |
+
"learning_rate": 9.06957416400209e-05,
|
1378 |
+
"loss": 0.6647,
|
1379 |
+
"step": 1820
|
1380 |
+
},
|
1381 |
+
{
|
1382 |
+
"epoch": 0.22749875683739434,
|
1383 |
+
"grad_norm": 1.0826516151428223,
|
1384 |
+
"learning_rate": 9.03375450270412e-05,
|
1385 |
+
"loss": 0.6775,
|
1386 |
+
"step": 1830
|
1387 |
+
},
|
1388 |
+
{
|
1389 |
+
"epoch": 0.22874191944306316,
|
1390 |
+
"grad_norm": 1.2367980480194092,
|
1391 |
+
"learning_rate": 8.997820412161764e-05,
|
1392 |
+
"loss": 0.7778,
|
1393 |
+
"step": 1840
|
1394 |
+
},
|
1395 |
+
{
|
1396 |
+
"epoch": 0.22998508204873197,
|
1397 |
+
"grad_norm": 1.4348937273025513,
|
1398 |
+
"learning_rate": 8.961773369484738e-05,
|
1399 |
+
"loss": 0.6699,
|
1400 |
+
"step": 1850
|
1401 |
+
},
|
1402 |
+
{
|
1403 |
+
"epoch": 0.23122824465440078,
|
1404 |
+
"grad_norm": 0.9706162810325623,
|
1405 |
+
"learning_rate": 8.925614856425786e-05,
|
1406 |
+
"loss": 0.684,
|
1407 |
+
"step": 1860
|
1408 |
+
},
|
1409 |
+
{
|
1410 |
+
"epoch": 0.23247140726006962,
|
1411 |
+
"grad_norm": 1.4127984046936035,
|
1412 |
+
"learning_rate": 8.88934635931975e-05,
|
1413 |
+
"loss": 0.6667,
|
1414 |
+
"step": 1870
|
1415 |
+
},
|
1416 |
+
{
|
1417 |
+
"epoch": 0.23371456986573844,
|
1418 |
+
"grad_norm": 1.4040454626083374,
|
1419 |
+
"learning_rate": 8.852969369022494e-05,
|
1420 |
+
"loss": 0.6014,
|
1421 |
+
"step": 1880
|
1422 |
+
},
|
1423 |
+
{
|
1424 |
+
"epoch": 0.23495773247140725,
|
1425 |
+
"grad_norm": 1.3731218576431274,
|
1426 |
+
"learning_rate": 8.816485380849613e-05,
|
1427 |
+
"loss": 0.7063,
|
1428 |
+
"step": 1890
|
1429 |
+
},
|
1430 |
+
{
|
1431 |
+
"epoch": 0.2362008950770761,
|
1432 |
+
"grad_norm": 1.5299303531646729,
|
1433 |
+
"learning_rate": 8.779895894514961e-05,
|
1434 |
+
"loss": 0.6177,
|
1435 |
+
"step": 1900
|
1436 |
+
},
|
1437 |
+
{
|
1438 |
+
"epoch": 0.2374440576827449,
|
1439 |
+
"grad_norm": 1.3770627975463867,
|
1440 |
+
"learning_rate": 8.743202414069011e-05,
|
1441 |
+
"loss": 0.6487,
|
1442 |
+
"step": 1910
|
1443 |
+
},
|
1444 |
+
{
|
1445 |
+
"epoch": 0.23868722028841372,
|
1446 |
+
"grad_norm": 1.1185230016708374,
|
1447 |
+
"learning_rate": 8.706406447837023e-05,
|
1448 |
+
"loss": 0.6612,
|
1449 |
+
"step": 1920
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"epoch": 0.23993038289408256,
|
1453 |
+
"grad_norm": 1.027255654335022,
|
1454 |
+
"learning_rate": 8.669509508357052e-05,
|
1455 |
+
"loss": 0.6125,
|
1456 |
+
"step": 1930
|
1457 |
+
},
|
1458 |
+
{
|
1459 |
+
"epoch": 0.24117354549975137,
|
1460 |
+
"grad_norm": 1.053480625152588,
|
1461 |
+
"learning_rate": 8.632513112317761e-05,
|
1462 |
+
"loss": 0.6614,
|
1463 |
+
"step": 1940
|
1464 |
+
},
|
1465 |
+
{
|
1466 |
+
"epoch": 0.24241670810542018,
|
1467 |
+
"grad_norm": 1.3678585290908813,
|
1468 |
+
"learning_rate": 8.59541878049609e-05,
|
1469 |
+
"loss": 0.5761,
|
1470 |
+
"step": 1950
|
1471 |
+
},
|
1472 |
+
{
|
1473 |
+
"epoch": 0.24241670810542018,
|
1474 |
+
"eval_loss": 0.8656662106513977,
|
1475 |
+
"eval_runtime": 56.0937,
|
1476 |
+
"eval_samples_per_second": 8.914,
|
1477 |
+
"eval_steps_per_second": 8.914,
|
1478 |
+
"step": 1950
|
1479 |
}
|
1480 |
],
|
1481 |
"logging_steps": 10,
|
|
|
1495 |
"attributes": {}
|
1496 |
}
|
1497 |
},
|
1498 |
+
"total_flos": 6.767178816165888e+16,
|
1499 |
"train_batch_size": 16,
|
1500 |
"trial_name": null,
|
1501 |
"trial_params": null
|