Training in progress, step 1956, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2503003904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e0a562a914d2be2b4ee279fe187629b69bd04971e3acab4f1c60939e5ec5996
|
3 |
size 2503003904
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5006244836
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9234c66d2cf43b48b78afca3f04cf7c13b9d3436c6cfdb169398c3ecd80cfe02
|
3 |
size 5006244836
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48c25ffa179744c0719c7b65566206a3ffbc025b1b73bf62d6945f9035c21dfa
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98f465f8ef34d3200760108c9ddb9bd27e97b140bce5b4d84a91de037dadb420
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -13391,6 +13391,314 @@
|
|
13391 |
"learning_rate": 1.3860803461989146e-07,
|
13392 |
"loss": 0.8676,
|
13393 |
"step": 1912
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13394 |
}
|
13395 |
],
|
13396 |
"logging_steps": 1,
|
@@ -13405,12 +13713,12 @@
|
|
13405 |
"should_evaluate": false,
|
13406 |
"should_log": false,
|
13407 |
"should_save": true,
|
13408 |
-
"should_training_stop":
|
13409 |
},
|
13410 |
"attributes": {}
|
13411 |
}
|
13412 |
},
|
13413 |
-
"total_flos": 7.
|
13414 |
"train_batch_size": 8,
|
13415 |
"trial_name": null,
|
13416 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1956,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
13391 |
"learning_rate": 1.3860803461989146e-07,
|
13392 |
"loss": 0.8676,
|
13393 |
"step": 1912
|
13394 |
+
},
|
13395 |
+
{
|
13396 |
+
"epoch": 0.9780163599182005,
|
13397 |
+
"grad_norm": 2.835423469543457,
|
13398 |
+
"learning_rate": 1.3238201455040844e-07,
|
13399 |
+
"loss": 0.8435,
|
13400 |
+
"step": 1913
|
13401 |
+
},
|
13402 |
+
{
|
13403 |
+
"epoch": 0.9785276073619632,
|
13404 |
+
"grad_norm": 3.2276833057403564,
|
13405 |
+
"learning_rate": 1.2629887148061792e-07,
|
13406 |
+
"loss": 0.9043,
|
13407 |
+
"step": 1914
|
13408 |
+
},
|
13409 |
+
{
|
13410 |
+
"epoch": 0.9790388548057259,
|
13411 |
+
"grad_norm": 3.260972261428833,
|
13412 |
+
"learning_rate": 1.203586228395004e-07,
|
13413 |
+
"loss": 0.9502,
|
13414 |
+
"step": 1915
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 0.9795501022494888,
|
13418 |
+
"grad_norm": 3.3064229488372803,
|
13419 |
+
"learning_rate": 1.1456128564660273e-07,
|
13420 |
+
"loss": 0.9968,
|
13421 |
+
"step": 1916
|
13422 |
+
},
|
13423 |
+
{
|
13424 |
+
"epoch": 0.9800613496932515,
|
13425 |
+
"grad_norm": 3.274178981781006,
|
13426 |
+
"learning_rate": 1.0890687651203823e-07,
|
13427 |
+
"loss": 0.8302,
|
13428 |
+
"step": 1917
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 0.9805725971370143,
|
13432 |
+
"grad_norm": 3.076536178588867,
|
13433 |
+
"learning_rate": 1.0339541163639776e-07,
|
13434 |
+
"loss": 0.9421,
|
13435 |
+
"step": 1918
|
13436 |
+
},
|
13437 |
+
{
|
13438 |
+
"epoch": 0.9810838445807771,
|
13439 |
+
"grad_norm": 3.247903823852539,
|
13440 |
+
"learning_rate": 9.802690681071647e-08,
|
13441 |
+
"loss": 0.9819,
|
13442 |
+
"step": 1919
|
13443 |
+
},
|
13444 |
+
{
|
13445 |
+
"epoch": 0.9815950920245399,
|
13446 |
+
"grad_norm": 3.3538260459899902,
|
13447 |
+
"learning_rate": 9.280137741643491e-08,
|
13448 |
+
"loss": 0.8744,
|
13449 |
+
"step": 1920
|
13450 |
+
},
|
13451 |
+
{
|
13452 |
+
"epoch": 0.9821063394683026,
|
13453 |
+
"grad_norm": 3.515782356262207,
|
13454 |
+
"learning_rate": 8.771883842536021e-08,
|
13455 |
+
"loss": 0.9124,
|
13456 |
+
"step": 1921
|
13457 |
+
},
|
13458 |
+
{
|
13459 |
+
"epoch": 0.9826175869120655,
|
13460 |
+
"grad_norm": 3.6226806640625,
|
13461 |
+
"learning_rate": 8.277930439959946e-08,
|
13462 |
+
"loss": 0.9011,
|
13463 |
+
"step": 1922
|
13464 |
+
},
|
13465 |
+
{
|
13466 |
+
"epoch": 0.9831288343558282,
|
13467 |
+
"grad_norm": 3.3394203186035156,
|
13468 |
+
"learning_rate": 7.798278949154303e-08,
|
13469 |
+
"loss": 0.8316,
|
13470 |
+
"step": 1923
|
13471 |
+
},
|
13472 |
+
{
|
13473 |
+
"epoch": 0.983640081799591,
|
13474 |
+
"grad_norm": 3.246371030807495,
|
13475 |
+
"learning_rate": 7.332930744380906e-08,
|
13476 |
+
"loss": 0.8556,
|
13477 |
+
"step": 1924
|
13478 |
+
},
|
13479 |
+
{
|
13480 |
+
"epoch": 0.9841513292433538,
|
13481 |
+
"grad_norm": 3.402927875518799,
|
13482 |
+
"learning_rate": 6.881887158920464e-08,
|
13483 |
+
"loss": 0.7978,
|
13484 |
+
"step": 1925
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 0.9846625766871165,
|
13488 |
+
"grad_norm": 3.8112809658050537,
|
13489 |
+
"learning_rate": 6.445149485070357e-08,
|
13490 |
+
"loss": 0.9133,
|
13491 |
+
"step": 1926
|
13492 |
+
},
|
13493 |
+
{
|
13494 |
+
"epoch": 0.9851738241308794,
|
13495 |
+
"grad_norm": 3.5460119247436523,
|
13496 |
+
"learning_rate": 6.022718974137975e-08,
|
13497 |
+
"loss": 0.8158,
|
13498 |
+
"step": 1927
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 0.9856850715746421,
|
13502 |
+
"grad_norm": 3.341395854949951,
|
13503 |
+
"learning_rate": 5.614596836440722e-08,
|
13504 |
+
"loss": 0.8246,
|
13505 |
+
"step": 1928
|
13506 |
+
},
|
13507 |
+
{
|
13508 |
+
"epoch": 0.9861963190184049,
|
13509 |
+
"grad_norm": 3.6873090267181396,
|
13510 |
+
"learning_rate": 5.2207842412999034e-08,
|
13511 |
+
"loss": 0.8714,
|
13512 |
+
"step": 1929
|
13513 |
+
},
|
13514 |
+
{
|
13515 |
+
"epoch": 0.9867075664621677,
|
13516 |
+
"grad_norm": 3.4815688133239746,
|
13517 |
+
"learning_rate": 4.841282317037399e-08,
|
13518 |
+
"loss": 0.8948,
|
13519 |
+
"step": 1930
|
13520 |
+
},
|
13521 |
+
{
|
13522 |
+
"epoch": 0.9872188139059305,
|
13523 |
+
"grad_norm": 3.5316038131713867,
|
13524 |
+
"learning_rate": 4.476092150975109e-08,
|
13525 |
+
"loss": 0.8622,
|
13526 |
+
"step": 1931
|
13527 |
+
},
|
13528 |
+
{
|
13529 |
+
"epoch": 0.9877300613496932,
|
13530 |
+
"grad_norm": 3.5975794792175293,
|
13531 |
+
"learning_rate": 4.1252147894277336e-08,
|
13532 |
+
"loss": 0.881,
|
13533 |
+
"step": 1932
|
13534 |
+
},
|
13535 |
+
{
|
13536 |
+
"epoch": 0.9882413087934561,
|
13537 |
+
"grad_norm": 3.441171646118164,
|
13538 |
+
"learning_rate": 3.7886512377033334e-08,
|
13539 |
+
"loss": 0.8396,
|
13540 |
+
"step": 1933
|
13541 |
+
},
|
13542 |
+
{
|
13543 |
+
"epoch": 0.9887525562372188,
|
13544 |
+
"grad_norm": 3.8511383533477783,
|
13545 |
+
"learning_rate": 3.4664024600988835e-08,
|
13546 |
+
"loss": 0.9208,
|
13547 |
+
"step": 1934
|
13548 |
+
},
|
13549 |
+
{
|
13550 |
+
"epoch": 0.9892638036809815,
|
13551 |
+
"grad_norm": 3.8687822818756104,
|
13552 |
+
"learning_rate": 3.158469379898055e-08,
|
13553 |
+
"loss": 0.9135,
|
13554 |
+
"step": 1935
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 0.9897750511247444,
|
13558 |
+
"grad_norm": 3.593276023864746,
|
13559 |
+
"learning_rate": 2.8648528793673302e-08,
|
13560 |
+
"loss": 0.8474,
|
13561 |
+
"step": 1936
|
13562 |
+
},
|
13563 |
+
{
|
13564 |
+
"epoch": 0.9902862985685071,
|
13565 |
+
"grad_norm": 4.0986127853393555,
|
13566 |
+
"learning_rate": 2.5855537997548917e-08,
|
13567 |
+
"loss": 0.8883,
|
13568 |
+
"step": 1937
|
13569 |
+
},
|
13570 |
+
{
|
13571 |
+
"epoch": 0.99079754601227,
|
13572 |
+
"grad_norm": 4.03285551071167,
|
13573 |
+
"learning_rate": 2.3205729412884016e-08,
|
13574 |
+
"loss": 0.7779,
|
13575 |
+
"step": 1938
|
13576 |
+
},
|
13577 |
+
{
|
13578 |
+
"epoch": 0.9913087934560327,
|
13579 |
+
"grad_norm": 4.346153736114502,
|
13580 |
+
"learning_rate": 2.0699110631711148e-08,
|
13581 |
+
"loss": 0.8757,
|
13582 |
+
"step": 1939
|
13583 |
+
},
|
13584 |
+
{
|
13585 |
+
"epoch": 0.9918200408997955,
|
13586 |
+
"grad_norm": 4.283609390258789,
|
13587 |
+
"learning_rate": 1.8335688835802167e-08,
|
13588 |
+
"loss": 0.8173,
|
13589 |
+
"step": 1940
|
13590 |
+
},
|
13591 |
+
{
|
13592 |
+
"epoch": 0.9923312883435583,
|
13593 |
+
"grad_norm": 4.301876068115234,
|
13594 |
+
"learning_rate": 1.6115470796662647e-08,
|
13595 |
+
"loss": 0.9134,
|
13596 |
+
"step": 1941
|
13597 |
+
},
|
13598 |
+
{
|
13599 |
+
"epoch": 0.9928425357873211,
|
13600 |
+
"grad_norm": 4.885223865509033,
|
13601 |
+
"learning_rate": 1.4038462875504143e-08,
|
13602 |
+
"loss": 0.8289,
|
13603 |
+
"step": 1942
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 0.9933537832310838,
|
13607 |
+
"grad_norm": 4.63042688369751,
|
13608 |
+
"learning_rate": 1.2104671023199787e-08,
|
13609 |
+
"loss": 0.8625,
|
13610 |
+
"step": 1943
|
13611 |
+
},
|
13612 |
+
{
|
13613 |
+
"epoch": 0.9938650306748467,
|
13614 |
+
"grad_norm": 4.702084064483643,
|
13615 |
+
"learning_rate": 1.0314100780317581e-08,
|
13616 |
+
"loss": 0.9342,
|
13617 |
+
"step": 1944
|
13618 |
+
},
|
13619 |
+
{
|
13620 |
+
"epoch": 0.9943762781186094,
|
13621 |
+
"grad_norm": 4.4585771560668945,
|
13622 |
+
"learning_rate": 8.666757277064897e-09,
|
13623 |
+
"loss": 0.6828,
|
13624 |
+
"step": 1945
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 0.9948875255623721,
|
13628 |
+
"grad_norm": 4.869369029998779,
|
13629 |
+
"learning_rate": 7.162645233282916e-09,
|
13630 |
+
"loss": 0.8505,
|
13631 |
+
"step": 1946
|
13632 |
+
},
|
13633 |
+
{
|
13634 |
+
"epoch": 0.995398773006135,
|
13635 |
+
"grad_norm": 4.623004913330078,
|
13636 |
+
"learning_rate": 5.8017689584521915e-09,
|
13637 |
+
"loss": 0.6772,
|
13638 |
+
"step": 1947
|
13639 |
+
},
|
13640 |
+
{
|
13641 |
+
"epoch": 0.9959100204498977,
|
13642 |
+
"grad_norm": 5.718740940093994,
|
13643 |
+
"learning_rate": 4.584132351642678e-09,
|
13644 |
+
"loss": 0.8251,
|
13645 |
+
"step": 1948
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 0.9964212678936605,
|
13649 |
+
"grad_norm": 5.196649551391602,
|
13650 |
+
"learning_rate": 3.509738901547044e-09,
|
13651 |
+
"loss": 0.6039,
|
13652 |
+
"step": 1949
|
13653 |
+
},
|
13654 |
+
{
|
13655 |
+
"epoch": 0.9969325153374233,
|
13656 |
+
"grad_norm": 6.253082752227783,
|
13657 |
+
"learning_rate": 2.5785916864307092e-09,
|
13658 |
+
"loss": 0.5829,
|
13659 |
+
"step": 1950
|
13660 |
+
},
|
13661 |
+
{
|
13662 |
+
"epoch": 0.9974437627811861,
|
13663 |
+
"grad_norm": 2.393533229827881,
|
13664 |
+
"learning_rate": 1.7906933741484999e-09,
|
13665 |
+
"loss": 0.869,
|
13666 |
+
"step": 1951
|
13667 |
+
},
|
13668 |
+
{
|
13669 |
+
"epoch": 0.9979550102249489,
|
13670 |
+
"grad_norm": 2.994673728942871,
|
13671 |
+
"learning_rate": 1.1460462221279944e-09,
|
13672 |
+
"loss": 0.9103,
|
13673 |
+
"step": 1952
|
13674 |
+
},
|
13675 |
+
{
|
13676 |
+
"epoch": 0.9984662576687117,
|
13677 |
+
"grad_norm": 3.5846893787384033,
|
13678 |
+
"learning_rate": 6.446520773695231e-10,
|
13679 |
+
"loss": 0.8366,
|
13680 |
+
"step": 1953
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 0.9989775051124744,
|
13684 |
+
"grad_norm": 3.5298497676849365,
|
13685 |
+
"learning_rate": 2.8651237642396414e-10,
|
13686 |
+
"loss": 0.7694,
|
13687 |
+
"step": 1954
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 0.9994887525562373,
|
13691 |
+
"grad_norm": 3.686530828475952,
|
13692 |
+
"learning_rate": 7.162814541494811e-11,
|
13693 |
+
"loss": 0.67,
|
13694 |
+
"step": 1955
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 1.0,
|
13698 |
+
"grad_norm": 5.693774223327637,
|
13699 |
+
"learning_rate": 0.0,
|
13700 |
+
"loss": 0.7087,
|
13701 |
+
"step": 1956
|
13702 |
}
|
13703 |
],
|
13704 |
"logging_steps": 1,
|
|
|
13713 |
"should_evaluate": false,
|
13714 |
"should_log": false,
|
13715 |
"should_save": true,
|
13716 |
+
"should_training_stop": true
|
13717 |
},
|
13718 |
"attributes": {}
|
13719 |
}
|
13720 |
},
|
13721 |
+
"total_flos": 7.478583213010452e+17,
|
13722 |
"train_batch_size": 8,
|
13723 |
"trial_name": null,
|
13724 |
"trial_params": null
|