Training in progress, step 207, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1101095848
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31fbb834535d30942ff8926c5ac856548f98ca3a71f1c2f7d371cb8f822d0e3c
|
3 |
size 1101095848
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 841204242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d315cdc1640f0b8c81f2afdc924fde254c2c741910745b9daca663b83e61a59f
|
3 |
size 841204242
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21bf96c648c2b81637c2a374c88eb7bd6aaef1de82d55c601d0b411131031f36
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0398efb9f2d009f44e4675efc73a4fa2f0e6d741b98fe4c59c94a120cfb58052
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 23,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1367,6 +1367,175 @@
|
|
1367 |
"eval_samples_per_second": 0.365,
|
1368 |
"eval_steps_per_second": 0.365,
|
1369 |
"step": 184
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1370 |
}
|
1371 |
],
|
1372 |
"logging_steps": 1,
|
@@ -1386,7 +1555,7 @@
|
|
1386 |
"attributes": {}
|
1387 |
}
|
1388 |
},
|
1389 |
-
"total_flos": 1.
|
1390 |
"train_batch_size": 8,
|
1391 |
"trial_name": null,
|
1392 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9078947368421053,
|
5 |
"eval_steps": 23,
|
6 |
+
"global_step": 207,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1367 |
"eval_samples_per_second": 0.365,
|
1368 |
"eval_steps_per_second": 0.365,
|
1369 |
"step": 184
|
1370 |
+
},
|
1371 |
+
{
|
1372 |
+
"epoch": 0.8114035087719298,
|
1373 |
+
"grad_norm": 0.04292619228363037,
|
1374 |
+
"learning_rate": 9.183446957694048e-06,
|
1375 |
+
"loss": 2.225,
|
1376 |
+
"step": 185
|
1377 |
+
},
|
1378 |
+
{
|
1379 |
+
"epoch": 0.8157894736842105,
|
1380 |
+
"grad_norm": 0.04302488639950752,
|
1381 |
+
"learning_rate": 8.997043104258856e-06,
|
1382 |
+
"loss": 2.0675,
|
1383 |
+
"step": 186
|
1384 |
+
},
|
1385 |
+
{
|
1386 |
+
"epoch": 0.8201754385964912,
|
1387 |
+
"grad_norm": 0.04218915104866028,
|
1388 |
+
"learning_rate": 8.814481808360945e-06,
|
1389 |
+
"loss": 2.1778,
|
1390 |
+
"step": 187
|
1391 |
+
},
|
1392 |
+
{
|
1393 |
+
"epoch": 0.8245614035087719,
|
1394 |
+
"grad_norm": 0.04348418116569519,
|
1395 |
+
"learning_rate": 8.635800982982958e-06,
|
1396 |
+
"loss": 2.2598,
|
1397 |
+
"step": 188
|
1398 |
+
},
|
1399 |
+
{
|
1400 |
+
"epoch": 0.8289473684210527,
|
1401 |
+
"grad_norm": 0.04379533231258392,
|
1402 |
+
"learning_rate": 8.461037735240047e-06,
|
1403 |
+
"loss": 2.223,
|
1404 |
+
"step": 189
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"epoch": 0.8333333333333334,
|
1408 |
+
"grad_norm": 0.04641556367278099,
|
1409 |
+
"learning_rate": 8.290228358673758e-06,
|
1410 |
+
"loss": 2.1633,
|
1411 |
+
"step": 190
|
1412 |
+
},
|
1413 |
+
{
|
1414 |
+
"epoch": 0.8377192982456141,
|
1415 |
+
"grad_norm": 0.04623427614569664,
|
1416 |
+
"learning_rate": 8.123408325714857e-06,
|
1417 |
+
"loss": 2.2546,
|
1418 |
+
"step": 191
|
1419 |
+
},
|
1420 |
+
{
|
1421 |
+
"epoch": 0.8421052631578947,
|
1422 |
+
"grad_norm": 0.04348750412464142,
|
1423 |
+
"learning_rate": 7.960612280316673e-06,
|
1424 |
+
"loss": 2.2283,
|
1425 |
+
"step": 192
|
1426 |
+
},
|
1427 |
+
{
|
1428 |
+
"epoch": 0.8464912280701754,
|
1429 |
+
"grad_norm": 0.04299633204936981,
|
1430 |
+
"learning_rate": 7.801874030760472e-06,
|
1431 |
+
"loss": 2.2155,
|
1432 |
+
"step": 193
|
1433 |
+
},
|
1434 |
+
{
|
1435 |
+
"epoch": 0.8508771929824561,
|
1436 |
+
"grad_norm": 0.04249183461070061,
|
1437 |
+
"learning_rate": 7.647226542634454e-06,
|
1438 |
+
"loss": 2.2647,
|
1439 |
+
"step": 194
|
1440 |
+
},
|
1441 |
+
{
|
1442 |
+
"epoch": 0.8552631578947368,
|
1443 |
+
"grad_norm": 0.04467320442199707,
|
1444 |
+
"learning_rate": 7.49670193198766e-06,
|
1445 |
+
"loss": 2.3202,
|
1446 |
+
"step": 195
|
1447 |
+
},
|
1448 |
+
{
|
1449 |
+
"epoch": 0.8596491228070176,
|
1450 |
+
"grad_norm": 0.04538441821932793,
|
1451 |
+
"learning_rate": 7.350331458660367e-06,
|
1452 |
+
"loss": 2.0542,
|
1453 |
+
"step": 196
|
1454 |
+
},
|
1455 |
+
{
|
1456 |
+
"epoch": 0.8640350877192983,
|
1457 |
+
"grad_norm": 0.04282210022211075,
|
1458 |
+
"learning_rate": 7.208145519792266e-06,
|
1459 |
+
"loss": 2.3344,
|
1460 |
+
"step": 197
|
1461 |
+
},
|
1462 |
+
{
|
1463 |
+
"epoch": 0.868421052631579,
|
1464 |
+
"grad_norm": 0.042627353221178055,
|
1465 |
+
"learning_rate": 7.0701736435098155e-06,
|
1466 |
+
"loss": 2.3739,
|
1467 |
+
"step": 198
|
1468 |
+
},
|
1469 |
+
{
|
1470 |
+
"epoch": 0.8728070175438597,
|
1471 |
+
"grad_norm": 0.04885130748152733,
|
1472 |
+
"learning_rate": 6.936444482794065e-06,
|
1473 |
+
"loss": 2.2614,
|
1474 |
+
"step": 199
|
1475 |
+
},
|
1476 |
+
{
|
1477 |
+
"epoch": 0.8771929824561403,
|
1478 |
+
"grad_norm": 0.04192091524600983,
|
1479 |
+
"learning_rate": 6.806985809530189e-06,
|
1480 |
+
"loss": 2.0821,
|
1481 |
+
"step": 200
|
1482 |
+
},
|
1483 |
+
{
|
1484 |
+
"epoch": 0.881578947368421,
|
1485 |
+
"grad_norm": 0.04542316868901253,
|
1486 |
+
"learning_rate": 6.6818245087400574e-06,
|
1487 |
+
"loss": 2.3226,
|
1488 |
+
"step": 201
|
1489 |
+
},
|
1490 |
+
{
|
1491 |
+
"epoch": 0.8859649122807017,
|
1492 |
+
"grad_norm": 0.0446937195956707,
|
1493 |
+
"learning_rate": 6.56098657299893e-06,
|
1494 |
+
"loss": 2.342,
|
1495 |
+
"step": 202
|
1496 |
+
},
|
1497 |
+
{
|
1498 |
+
"epoch": 0.8903508771929824,
|
1499 |
+
"grad_norm": 0.04320209473371506,
|
1500 |
+
"learning_rate": 6.444497097037532e-06,
|
1501 |
+
"loss": 2.1945,
|
1502 |
+
"step": 203
|
1503 |
+
},
|
1504 |
+
{
|
1505 |
+
"epoch": 0.8947368421052632,
|
1506 |
+
"grad_norm": 0.04684532806277275,
|
1507 |
+
"learning_rate": 6.332380272530536e-06,
|
1508 |
+
"loss": 2.2744,
|
1509 |
+
"step": 204
|
1510 |
+
},
|
1511 |
+
{
|
1512 |
+
"epoch": 0.8991228070175439,
|
1513 |
+
"grad_norm": 0.04657423868775368,
|
1514 |
+
"learning_rate": 6.224659383072649e-06,
|
1515 |
+
"loss": 2.1249,
|
1516 |
+
"step": 205
|
1517 |
+
},
|
1518 |
+
{
|
1519 |
+
"epoch": 0.9035087719298246,
|
1520 |
+
"grad_norm": 0.04765097796916962,
|
1521 |
+
"learning_rate": 6.1213567993432085e-06,
|
1522 |
+
"loss": 2.1456,
|
1523 |
+
"step": 206
|
1524 |
+
},
|
1525 |
+
{
|
1526 |
+
"epoch": 0.9078947368421053,
|
1527 |
+
"grad_norm": 0.047186579555273056,
|
1528 |
+
"learning_rate": 6.022493974460447e-06,
|
1529 |
+
"loss": 2.2972,
|
1530 |
+
"step": 207
|
1531 |
+
},
|
1532 |
+
{
|
1533 |
+
"epoch": 0.9078947368421053,
|
1534 |
+
"eval_loss": 2.163572311401367,
|
1535 |
+
"eval_runtime": 218.9467,
|
1536 |
+
"eval_samples_per_second": 0.365,
|
1537 |
+
"eval_steps_per_second": 0.365,
|
1538 |
+
"step": 207
|
1539 |
}
|
1540 |
],
|
1541 |
"logging_steps": 1,
|
|
|
1555 |
"attributes": {}
|
1556 |
}
|
1557 |
},
|
1558 |
+
"total_flos": 1.161249603094315e+18,
|
1559 |
"train_batch_size": 8,
|
1560 |
"trial_name": null,
|
1561 |
"trial_params": null
|