Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1053 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3999731048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb275cab79020aa09bccc6f79dff807df866b97778167594ac5838e0f3362fd7
|
3 |
size 3999731048
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2886855646
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cb2e80d09d2cf618d21094241a4fba4cf2be757a9c0d04967e46edbadfa82b4
|
3 |
size 2886855646
|
rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d01ca6301d951d367a8578ffa373935e66fa172d3e43f478f697519b0d501c65
|
3 |
size 14512
|
rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae6699db98de51cb8b8e21482aec6935e7411bb7e47e91f23409fa9f61b8faa2
|
3 |
size 14512
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:805476656c48a00bdf96fb0a72ecaf54943cbaa2dccb831b117963fd087e3cbb
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -24507,6 +24507,1056 @@
|
|
24507 |
"learning_rate": 1.7848801475107564e-05,
|
24508 |
"loss": 1.2568,
|
24509 |
"step": 7000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24510 |
}
|
24511 |
],
|
24512 |
"logging_steps": 2,
|
@@ -24526,7 +25576,7 @@
|
|
24526 |
"attributes": {}
|
24527 |
}
|
24528 |
},
|
24529 |
-
"total_flos": 4.
|
24530 |
"train_batch_size": 8,
|
24531 |
"trial_name": null,
|
24532 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5608481868469576,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 7300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
24507 |
"learning_rate": 1.7848801475107564e-05,
|
24508 |
"loss": 1.2568,
|
24509 |
"step": 7000
|
24510 |
+
},
|
24511 |
+
{
|
24512 |
+
"epoch": 0.5379532882606023,
|
24513 |
+
"grad_norm": 2.814363718032837,
|
24514 |
+
"learning_rate": 1.784818684695759e-05,
|
24515 |
+
"loss": 1.4887,
|
24516 |
+
"step": 7002
|
24517 |
+
},
|
24518 |
+
{
|
24519 |
+
"epoch": 0.5381069452980947,
|
24520 |
+
"grad_norm": 2.9154648780822754,
|
24521 |
+
"learning_rate": 1.7847572218807623e-05,
|
24522 |
+
"loss": 1.1712,
|
24523 |
+
"step": 7004
|
24524 |
+
},
|
24525 |
+
{
|
24526 |
+
"epoch": 0.5382606023355869,
|
24527 |
+
"grad_norm": 3.22001051902771,
|
24528 |
+
"learning_rate": 1.7846957590657653e-05,
|
24529 |
+
"loss": 1.3,
|
24530 |
+
"step": 7006
|
24531 |
+
},
|
24532 |
+
{
|
24533 |
+
"epoch": 0.5384142593730793,
|
24534 |
+
"grad_norm": 3.288647174835205,
|
24535 |
+
"learning_rate": 1.7846342962507682e-05,
|
24536 |
+
"loss": 1.3765,
|
24537 |
+
"step": 7008
|
24538 |
+
},
|
24539 |
+
{
|
24540 |
+
"epoch": 0.5385679164105716,
|
24541 |
+
"grad_norm": 3.0418827533721924,
|
24542 |
+
"learning_rate": 1.7845728334357715e-05,
|
24543 |
+
"loss": 1.454,
|
24544 |
+
"step": 7010
|
24545 |
+
},
|
24546 |
+
{
|
24547 |
+
"epoch": 0.5387215734480639,
|
24548 |
+
"grad_norm": 2.8727383613586426,
|
24549 |
+
"learning_rate": 1.7845113706207745e-05,
|
24550 |
+
"loss": 1.3323,
|
24551 |
+
"step": 7012
|
24552 |
+
},
|
24553 |
+
{
|
24554 |
+
"epoch": 0.5388752304855562,
|
24555 |
+
"grad_norm": 3.0164737701416016,
|
24556 |
+
"learning_rate": 1.7844499078057778e-05,
|
24557 |
+
"loss": 1.2651,
|
24558 |
+
"step": 7014
|
24559 |
+
},
|
24560 |
+
{
|
24561 |
+
"epoch": 0.5390288875230486,
|
24562 |
+
"grad_norm": 2.6829867362976074,
|
24563 |
+
"learning_rate": 1.7843884449907808e-05,
|
24564 |
+
"loss": 1.3604,
|
24565 |
+
"step": 7016
|
24566 |
+
},
|
24567 |
+
{
|
24568 |
+
"epoch": 0.5391825445605408,
|
24569 |
+
"grad_norm": 3.5748116970062256,
|
24570 |
+
"learning_rate": 1.7843269821757837e-05,
|
24571 |
+
"loss": 1.4137,
|
24572 |
+
"step": 7018
|
24573 |
+
},
|
24574 |
+
{
|
24575 |
+
"epoch": 0.5393362015980332,
|
24576 |
+
"grad_norm": 2.9325039386749268,
|
24577 |
+
"learning_rate": 1.784265519360787e-05,
|
24578 |
+
"loss": 1.3057,
|
24579 |
+
"step": 7020
|
24580 |
+
},
|
24581 |
+
{
|
24582 |
+
"epoch": 0.5394898586355255,
|
24583 |
+
"grad_norm": 2.7085931301116943,
|
24584 |
+
"learning_rate": 1.78420405654579e-05,
|
24585 |
+
"loss": 1.1975,
|
24586 |
+
"step": 7022
|
24587 |
+
},
|
24588 |
+
{
|
24589 |
+
"epoch": 0.5396435156730178,
|
24590 |
+
"grad_norm": 2.880764961242676,
|
24591 |
+
"learning_rate": 1.784142593730793e-05,
|
24592 |
+
"loss": 1.35,
|
24593 |
+
"step": 7024
|
24594 |
+
},
|
24595 |
+
{
|
24596 |
+
"epoch": 0.5397971727105101,
|
24597 |
+
"grad_norm": 3.0631964206695557,
|
24598 |
+
"learning_rate": 1.7840811309157963e-05,
|
24599 |
+
"loss": 1.2638,
|
24600 |
+
"step": 7026
|
24601 |
+
},
|
24602 |
+
{
|
24603 |
+
"epoch": 0.5399508297480025,
|
24604 |
+
"grad_norm": 4.236493110656738,
|
24605 |
+
"learning_rate": 1.784019668100799e-05,
|
24606 |
+
"loss": 1.2436,
|
24607 |
+
"step": 7028
|
24608 |
+
},
|
24609 |
+
{
|
24610 |
+
"epoch": 0.5401044867854948,
|
24611 |
+
"grad_norm": 2.839618682861328,
|
24612 |
+
"learning_rate": 1.7839582052858022e-05,
|
24613 |
+
"loss": 1.3736,
|
24614 |
+
"step": 7030
|
24615 |
+
},
|
24616 |
+
{
|
24617 |
+
"epoch": 0.5402581438229871,
|
24618 |
+
"grad_norm": 3.1497669219970703,
|
24619 |
+
"learning_rate": 1.7838967424708052e-05,
|
24620 |
+
"loss": 1.4248,
|
24621 |
+
"step": 7032
|
24622 |
+
},
|
24623 |
+
{
|
24624 |
+
"epoch": 0.5404118008604795,
|
24625 |
+
"grad_norm": 2.8350067138671875,
|
24626 |
+
"learning_rate": 1.783835279655808e-05,
|
24627 |
+
"loss": 1.2533,
|
24628 |
+
"step": 7034
|
24629 |
+
},
|
24630 |
+
{
|
24631 |
+
"epoch": 0.5405654578979717,
|
24632 |
+
"grad_norm": 33.07500457763672,
|
24633 |
+
"learning_rate": 1.7837738168408115e-05,
|
24634 |
+
"loss": 1.4906,
|
24635 |
+
"step": 7036
|
24636 |
+
},
|
24637 |
+
{
|
24638 |
+
"epoch": 0.5407191149354641,
|
24639 |
+
"grad_norm": 3.066897392272949,
|
24640 |
+
"learning_rate": 1.7837123540258144e-05,
|
24641 |
+
"loss": 1.4008,
|
24642 |
+
"step": 7038
|
24643 |
+
},
|
24644 |
+
{
|
24645 |
+
"epoch": 0.5408727719729564,
|
24646 |
+
"grad_norm": 3.2352633476257324,
|
24647 |
+
"learning_rate": 1.7836508912108178e-05,
|
24648 |
+
"loss": 1.3084,
|
24649 |
+
"step": 7040
|
24650 |
+
},
|
24651 |
+
{
|
24652 |
+
"epoch": 0.5410264290104487,
|
24653 |
+
"grad_norm": 3.37577486038208,
|
24654 |
+
"learning_rate": 1.7835894283958207e-05,
|
24655 |
+
"loss": 1.3395,
|
24656 |
+
"step": 7042
|
24657 |
+
},
|
24658 |
+
{
|
24659 |
+
"epoch": 0.541180086047941,
|
24660 |
+
"grad_norm": 2.8560941219329834,
|
24661 |
+
"learning_rate": 1.7835279655808237e-05,
|
24662 |
+
"loss": 1.3559,
|
24663 |
+
"step": 7044
|
24664 |
+
},
|
24665 |
+
{
|
24666 |
+
"epoch": 0.5413337430854334,
|
24667 |
+
"grad_norm": 3.0230565071105957,
|
24668 |
+
"learning_rate": 1.783466502765827e-05,
|
24669 |
+
"loss": 1.3866,
|
24670 |
+
"step": 7046
|
24671 |
+
},
|
24672 |
+
{
|
24673 |
+
"epoch": 0.5414874001229256,
|
24674 |
+
"grad_norm": 2.6992247104644775,
|
24675 |
+
"learning_rate": 1.78340503995083e-05,
|
24676 |
+
"loss": 1.2888,
|
24677 |
+
"step": 7048
|
24678 |
+
},
|
24679 |
+
{
|
24680 |
+
"epoch": 0.541641057160418,
|
24681 |
+
"grad_norm": 3.213545322418213,
|
24682 |
+
"learning_rate": 1.783343577135833e-05,
|
24683 |
+
"loss": 1.4569,
|
24684 |
+
"step": 7050
|
24685 |
+
},
|
24686 |
+
{
|
24687 |
+
"epoch": 0.5417947141979103,
|
24688 |
+
"grad_norm": 3.045323610305786,
|
24689 |
+
"learning_rate": 1.7832821143208362e-05,
|
24690 |
+
"loss": 1.406,
|
24691 |
+
"step": 7052
|
24692 |
+
},
|
24693 |
+
{
|
24694 |
+
"epoch": 0.5419483712354026,
|
24695 |
+
"grad_norm": 3.1526753902435303,
|
24696 |
+
"learning_rate": 1.783220651505839e-05,
|
24697 |
+
"loss": 1.4462,
|
24698 |
+
"step": 7054
|
24699 |
+
},
|
24700 |
+
{
|
24701 |
+
"epoch": 0.5421020282728949,
|
24702 |
+
"grad_norm": 2.5964386463165283,
|
24703 |
+
"learning_rate": 1.7831591886908422e-05,
|
24704 |
+
"loss": 1.4212,
|
24705 |
+
"step": 7056
|
24706 |
+
},
|
24707 |
+
{
|
24708 |
+
"epoch": 0.5422556853103873,
|
24709 |
+
"grad_norm": 3.107309341430664,
|
24710 |
+
"learning_rate": 1.783097725875845e-05,
|
24711 |
+
"loss": 1.2955,
|
24712 |
+
"step": 7058
|
24713 |
+
},
|
24714 |
+
{
|
24715 |
+
"epoch": 0.5424093423478795,
|
24716 |
+
"grad_norm": 3.1672708988189697,
|
24717 |
+
"learning_rate": 1.7830362630608485e-05,
|
24718 |
+
"loss": 1.282,
|
24719 |
+
"step": 7060
|
24720 |
+
},
|
24721 |
+
{
|
24722 |
+
"epoch": 0.5425629993853719,
|
24723 |
+
"grad_norm": 3.1407082080841064,
|
24724 |
+
"learning_rate": 1.7829748002458514e-05,
|
24725 |
+
"loss": 1.2697,
|
24726 |
+
"step": 7062
|
24727 |
+
},
|
24728 |
+
{
|
24729 |
+
"epoch": 0.5427166564228642,
|
24730 |
+
"grad_norm": 2.588823080062866,
|
24731 |
+
"learning_rate": 1.7829133374308544e-05,
|
24732 |
+
"loss": 1.2566,
|
24733 |
+
"step": 7064
|
24734 |
+
},
|
24735 |
+
{
|
24736 |
+
"epoch": 0.5428703134603565,
|
24737 |
+
"grad_norm": 2.709662914276123,
|
24738 |
+
"learning_rate": 1.7828518746158577e-05,
|
24739 |
+
"loss": 1.3653,
|
24740 |
+
"step": 7066
|
24741 |
+
},
|
24742 |
+
{
|
24743 |
+
"epoch": 0.5430239704978488,
|
24744 |
+
"grad_norm": 2.963088274002075,
|
24745 |
+
"learning_rate": 1.7827904118008607e-05,
|
24746 |
+
"loss": 1.3215,
|
24747 |
+
"step": 7068
|
24748 |
+
},
|
24749 |
+
{
|
24750 |
+
"epoch": 0.5431776275353412,
|
24751 |
+
"grad_norm": 3.0335934162139893,
|
24752 |
+
"learning_rate": 1.7827289489858636e-05,
|
24753 |
+
"loss": 1.4173,
|
24754 |
+
"step": 7070
|
24755 |
+
},
|
24756 |
+
{
|
24757 |
+
"epoch": 0.5433312845728334,
|
24758 |
+
"grad_norm": 3.156770944595337,
|
24759 |
+
"learning_rate": 1.782667486170867e-05,
|
24760 |
+
"loss": 1.3679,
|
24761 |
+
"step": 7072
|
24762 |
+
},
|
24763 |
+
{
|
24764 |
+
"epoch": 0.5434849416103258,
|
24765 |
+
"grad_norm": 3.3931353092193604,
|
24766 |
+
"learning_rate": 1.78260602335587e-05,
|
24767 |
+
"loss": 1.4272,
|
24768 |
+
"step": 7074
|
24769 |
+
},
|
24770 |
+
{
|
24771 |
+
"epoch": 0.543638598647818,
|
24772 |
+
"grad_norm": 2.5168139934539795,
|
24773 |
+
"learning_rate": 1.782544560540873e-05,
|
24774 |
+
"loss": 1.3466,
|
24775 |
+
"step": 7076
|
24776 |
+
},
|
24777 |
+
{
|
24778 |
+
"epoch": 0.5437922556853104,
|
24779 |
+
"grad_norm": 2.82444429397583,
|
24780 |
+
"learning_rate": 1.7824830977258762e-05,
|
24781 |
+
"loss": 1.3579,
|
24782 |
+
"step": 7078
|
24783 |
+
},
|
24784 |
+
{
|
24785 |
+
"epoch": 0.5439459127228027,
|
24786 |
+
"grad_norm": 3.1184284687042236,
|
24787 |
+
"learning_rate": 1.782421634910879e-05,
|
24788 |
+
"loss": 1.4413,
|
24789 |
+
"step": 7080
|
24790 |
+
},
|
24791 |
+
{
|
24792 |
+
"epoch": 0.544099569760295,
|
24793 |
+
"grad_norm": 3.5062296390533447,
|
24794 |
+
"learning_rate": 1.782360172095882e-05,
|
24795 |
+
"loss": 1.4573,
|
24796 |
+
"step": 7082
|
24797 |
+
},
|
24798 |
+
{
|
24799 |
+
"epoch": 0.5442532267977873,
|
24800 |
+
"grad_norm": 3.173992395401001,
|
24801 |
+
"learning_rate": 1.782298709280885e-05,
|
24802 |
+
"loss": 1.3502,
|
24803 |
+
"step": 7084
|
24804 |
+
},
|
24805 |
+
{
|
24806 |
+
"epoch": 0.5444068838352797,
|
24807 |
+
"grad_norm": 2.8979501724243164,
|
24808 |
+
"learning_rate": 1.7822372464658884e-05,
|
24809 |
+
"loss": 1.4154,
|
24810 |
+
"step": 7086
|
24811 |
+
},
|
24812 |
+
{
|
24813 |
+
"epoch": 0.544560540872772,
|
24814 |
+
"grad_norm": 3.195020914077759,
|
24815 |
+
"learning_rate": 1.7821757836508914e-05,
|
24816 |
+
"loss": 1.408,
|
24817 |
+
"step": 7088
|
24818 |
+
},
|
24819 |
+
{
|
24820 |
+
"epoch": 0.5447141979102643,
|
24821 |
+
"grad_norm": 2.883970022201538,
|
24822 |
+
"learning_rate": 1.7821143208358943e-05,
|
24823 |
+
"loss": 1.3705,
|
24824 |
+
"step": 7090
|
24825 |
+
},
|
24826 |
+
{
|
24827 |
+
"epoch": 0.5448678549477566,
|
24828 |
+
"grad_norm": 3.1312525272369385,
|
24829 |
+
"learning_rate": 1.7820528580208976e-05,
|
24830 |
+
"loss": 1.3827,
|
24831 |
+
"step": 7092
|
24832 |
+
},
|
24833 |
+
{
|
24834 |
+
"epoch": 0.545021511985249,
|
24835 |
+
"grad_norm": 2.855011224746704,
|
24836 |
+
"learning_rate": 1.7819913952059006e-05,
|
24837 |
+
"loss": 1.1705,
|
24838 |
+
"step": 7094
|
24839 |
+
},
|
24840 |
+
{
|
24841 |
+
"epoch": 0.5451751690227412,
|
24842 |
+
"grad_norm": 2.7898807525634766,
|
24843 |
+
"learning_rate": 1.7819299323909036e-05,
|
24844 |
+
"loss": 1.2252,
|
24845 |
+
"step": 7096
|
24846 |
+
},
|
24847 |
+
{
|
24848 |
+
"epoch": 0.5453288260602336,
|
24849 |
+
"grad_norm": 3.0114316940307617,
|
24850 |
+
"learning_rate": 1.781868469575907e-05,
|
24851 |
+
"loss": 1.3664,
|
24852 |
+
"step": 7098
|
24853 |
+
},
|
24854 |
+
{
|
24855 |
+
"epoch": 0.5454824830977258,
|
24856 |
+
"grad_norm": 3.1177165508270264,
|
24857 |
+
"learning_rate": 1.78180700676091e-05,
|
24858 |
+
"loss": 1.4363,
|
24859 |
+
"step": 7100
|
24860 |
+
},
|
24861 |
+
{
|
24862 |
+
"epoch": 0.5456361401352182,
|
24863 |
+
"grad_norm": 3.265270471572876,
|
24864 |
+
"learning_rate": 1.7817455439459128e-05,
|
24865 |
+
"loss": 1.4604,
|
24866 |
+
"step": 7102
|
24867 |
+
},
|
24868 |
+
{
|
24869 |
+
"epoch": 0.5457897971727105,
|
24870 |
+
"grad_norm": 2.9632492065429688,
|
24871 |
+
"learning_rate": 1.7816840811309158e-05,
|
24872 |
+
"loss": 1.4497,
|
24873 |
+
"step": 7104
|
24874 |
+
},
|
24875 |
+
{
|
24876 |
+
"epoch": 0.5459434542102028,
|
24877 |
+
"grad_norm": 2.721161127090454,
|
24878 |
+
"learning_rate": 1.781622618315919e-05,
|
24879 |
+
"loss": 1.3612,
|
24880 |
+
"step": 7106
|
24881 |
+
},
|
24882 |
+
{
|
24883 |
+
"epoch": 0.5460971112476951,
|
24884 |
+
"grad_norm": 2.503848075866699,
|
24885 |
+
"learning_rate": 1.781561155500922e-05,
|
24886 |
+
"loss": 1.2765,
|
24887 |
+
"step": 7108
|
24888 |
+
},
|
24889 |
+
{
|
24890 |
+
"epoch": 0.5462507682851875,
|
24891 |
+
"grad_norm": 3.5437700748443604,
|
24892 |
+
"learning_rate": 1.781499692685925e-05,
|
24893 |
+
"loss": 1.3824,
|
24894 |
+
"step": 7110
|
24895 |
+
},
|
24896 |
+
{
|
24897 |
+
"epoch": 0.5464044253226797,
|
24898 |
+
"grad_norm": 2.859767436981201,
|
24899 |
+
"learning_rate": 1.7814382298709283e-05,
|
24900 |
+
"loss": 1.3918,
|
24901 |
+
"step": 7112
|
24902 |
+
},
|
24903 |
+
{
|
24904 |
+
"epoch": 0.5465580823601721,
|
24905 |
+
"grad_norm": 2.664762496948242,
|
24906 |
+
"learning_rate": 1.7813767670559313e-05,
|
24907 |
+
"loss": 1.3302,
|
24908 |
+
"step": 7114
|
24909 |
+
},
|
24910 |
+
{
|
24911 |
+
"epoch": 0.5467117393976644,
|
24912 |
+
"grad_norm": 2.8141744136810303,
|
24913 |
+
"learning_rate": 1.7813153042409343e-05,
|
24914 |
+
"loss": 1.2234,
|
24915 |
+
"step": 7116
|
24916 |
+
},
|
24917 |
+
{
|
24918 |
+
"epoch": 0.5468653964351567,
|
24919 |
+
"grad_norm": 3.569658041000366,
|
24920 |
+
"learning_rate": 1.7812538414259376e-05,
|
24921 |
+
"loss": 1.3897,
|
24922 |
+
"step": 7118
|
24923 |
+
},
|
24924 |
+
{
|
24925 |
+
"epoch": 0.547019053472649,
|
24926 |
+
"grad_norm": 2.765789747238159,
|
24927 |
+
"learning_rate": 1.7811923786109406e-05,
|
24928 |
+
"loss": 1.3527,
|
24929 |
+
"step": 7120
|
24930 |
+
},
|
24931 |
+
{
|
24932 |
+
"epoch": 0.5471727105101414,
|
24933 |
+
"grad_norm": 3.1543784141540527,
|
24934 |
+
"learning_rate": 1.7811309157959435e-05,
|
24935 |
+
"loss": 1.3532,
|
24936 |
+
"step": 7122
|
24937 |
+
},
|
24938 |
+
{
|
24939 |
+
"epoch": 0.5473263675476336,
|
24940 |
+
"grad_norm": 3.019174337387085,
|
24941 |
+
"learning_rate": 1.7810694529809468e-05,
|
24942 |
+
"loss": 1.3695,
|
24943 |
+
"step": 7124
|
24944 |
+
},
|
24945 |
+
{
|
24946 |
+
"epoch": 0.547480024585126,
|
24947 |
+
"grad_norm": 3.103487968444824,
|
24948 |
+
"learning_rate": 1.7810079901659498e-05,
|
24949 |
+
"loss": 1.3873,
|
24950 |
+
"step": 7126
|
24951 |
+
},
|
24952 |
+
{
|
24953 |
+
"epoch": 0.5476336816226183,
|
24954 |
+
"grad_norm": 2.756798505783081,
|
24955 |
+
"learning_rate": 1.7809465273509528e-05,
|
24956 |
+
"loss": 1.2963,
|
24957 |
+
"step": 7128
|
24958 |
+
},
|
24959 |
+
{
|
24960 |
+
"epoch": 0.5477873386601106,
|
24961 |
+
"grad_norm": 3.0003297328948975,
|
24962 |
+
"learning_rate": 1.7808850645359557e-05,
|
24963 |
+
"loss": 1.3152,
|
24964 |
+
"step": 7130
|
24965 |
+
},
|
24966 |
+
{
|
24967 |
+
"epoch": 0.5479409956976029,
|
24968 |
+
"grad_norm": 2.684039831161499,
|
24969 |
+
"learning_rate": 1.780823601720959e-05,
|
24970 |
+
"loss": 1.4226,
|
24971 |
+
"step": 7132
|
24972 |
+
},
|
24973 |
+
{
|
24974 |
+
"epoch": 0.5480946527350953,
|
24975 |
+
"grad_norm": 2.9724183082580566,
|
24976 |
+
"learning_rate": 1.780762138905962e-05,
|
24977 |
+
"loss": 1.4356,
|
24978 |
+
"step": 7134
|
24979 |
+
},
|
24980 |
+
{
|
24981 |
+
"epoch": 0.5482483097725875,
|
24982 |
+
"grad_norm": 2.8482649326324463,
|
24983 |
+
"learning_rate": 1.780700676090965e-05,
|
24984 |
+
"loss": 1.1933,
|
24985 |
+
"step": 7136
|
24986 |
+
},
|
24987 |
+
{
|
24988 |
+
"epoch": 0.5484019668100799,
|
24989 |
+
"grad_norm": 3.3711745738983154,
|
24990 |
+
"learning_rate": 1.7806392132759683e-05,
|
24991 |
+
"loss": 1.3696,
|
24992 |
+
"step": 7138
|
24993 |
+
},
|
24994 |
+
{
|
24995 |
+
"epoch": 0.5485556238475722,
|
24996 |
+
"grad_norm": 3.0896215438842773,
|
24997 |
+
"learning_rate": 1.7805777504609713e-05,
|
24998 |
+
"loss": 1.4287,
|
24999 |
+
"step": 7140
|
25000 |
+
},
|
25001 |
+
{
|
25002 |
+
"epoch": 0.5487092808850645,
|
25003 |
+
"grad_norm": 3.777125120162964,
|
25004 |
+
"learning_rate": 1.7805162876459742e-05,
|
25005 |
+
"loss": 1.3143,
|
25006 |
+
"step": 7142
|
25007 |
+
},
|
25008 |
+
{
|
25009 |
+
"epoch": 0.5488629379225568,
|
25010 |
+
"grad_norm": 3.1980645656585693,
|
25011 |
+
"learning_rate": 1.7804548248309775e-05,
|
25012 |
+
"loss": 1.3388,
|
25013 |
+
"step": 7144
|
25014 |
+
},
|
25015 |
+
{
|
25016 |
+
"epoch": 0.5490165949600492,
|
25017 |
+
"grad_norm": 3.0263702869415283,
|
25018 |
+
"learning_rate": 1.7803933620159805e-05,
|
25019 |
+
"loss": 1.2497,
|
25020 |
+
"step": 7146
|
25021 |
+
},
|
25022 |
+
{
|
25023 |
+
"epoch": 0.5491702519975414,
|
25024 |
+
"grad_norm": 3.010733127593994,
|
25025 |
+
"learning_rate": 1.7803318992009835e-05,
|
25026 |
+
"loss": 1.1814,
|
25027 |
+
"step": 7148
|
25028 |
+
},
|
25029 |
+
{
|
25030 |
+
"epoch": 0.5493239090350338,
|
25031 |
+
"grad_norm": 2.8031928539276123,
|
25032 |
+
"learning_rate": 1.7802704363859868e-05,
|
25033 |
+
"loss": 1.251,
|
25034 |
+
"step": 7150
|
25035 |
+
},
|
25036 |
+
{
|
25037 |
+
"epoch": 0.5494775660725261,
|
25038 |
+
"grad_norm": 3.2427356243133545,
|
25039 |
+
"learning_rate": 1.7802089735709897e-05,
|
25040 |
+
"loss": 1.3079,
|
25041 |
+
"step": 7152
|
25042 |
+
},
|
25043 |
+
{
|
25044 |
+
"epoch": 0.5496312231100184,
|
25045 |
+
"grad_norm": 3.0464084148406982,
|
25046 |
+
"learning_rate": 1.7801475107559927e-05,
|
25047 |
+
"loss": 1.3989,
|
25048 |
+
"step": 7154
|
25049 |
+
},
|
25050 |
+
{
|
25051 |
+
"epoch": 0.5497848801475107,
|
25052 |
+
"grad_norm": 2.847355604171753,
|
25053 |
+
"learning_rate": 1.7800860479409957e-05,
|
25054 |
+
"loss": 1.3637,
|
25055 |
+
"step": 7156
|
25056 |
+
},
|
25057 |
+
{
|
25058 |
+
"epoch": 0.5499385371850031,
|
25059 |
+
"grad_norm": 3.1915597915649414,
|
25060 |
+
"learning_rate": 1.780024585125999e-05,
|
25061 |
+
"loss": 1.3317,
|
25062 |
+
"step": 7158
|
25063 |
+
},
|
25064 |
+
{
|
25065 |
+
"epoch": 0.5500921942224954,
|
25066 |
+
"grad_norm": 3.0718259811401367,
|
25067 |
+
"learning_rate": 1.779963122311002e-05,
|
25068 |
+
"loss": 1.3284,
|
25069 |
+
"step": 7160
|
25070 |
+
},
|
25071 |
+
{
|
25072 |
+
"epoch": 0.5502458512599877,
|
25073 |
+
"grad_norm": 3.015608310699463,
|
25074 |
+
"learning_rate": 1.779901659496005e-05,
|
25075 |
+
"loss": 1.3458,
|
25076 |
+
"step": 7162
|
25077 |
+
},
|
25078 |
+
{
|
25079 |
+
"epoch": 0.5503995082974801,
|
25080 |
+
"grad_norm": 3.1199285984039307,
|
25081 |
+
"learning_rate": 1.7798401966810082e-05,
|
25082 |
+
"loss": 1.3407,
|
25083 |
+
"step": 7164
|
25084 |
+
},
|
25085 |
+
{
|
25086 |
+
"epoch": 0.5505531653349723,
|
25087 |
+
"grad_norm": 3.2667245864868164,
|
25088 |
+
"learning_rate": 1.7797787338660112e-05,
|
25089 |
+
"loss": 1.3999,
|
25090 |
+
"step": 7166
|
25091 |
+
},
|
25092 |
+
{
|
25093 |
+
"epoch": 0.5507068223724647,
|
25094 |
+
"grad_norm": 2.711768865585327,
|
25095 |
+
"learning_rate": 1.779717271051014e-05,
|
25096 |
+
"loss": 1.2374,
|
25097 |
+
"step": 7168
|
25098 |
+
},
|
25099 |
+
{
|
25100 |
+
"epoch": 0.550860479409957,
|
25101 |
+
"grad_norm": 2.933833599090576,
|
25102 |
+
"learning_rate": 1.7796558082360175e-05,
|
25103 |
+
"loss": 1.2441,
|
25104 |
+
"step": 7170
|
25105 |
+
},
|
25106 |
+
{
|
25107 |
+
"epoch": 0.5510141364474493,
|
25108 |
+
"grad_norm": 2.5497310161590576,
|
25109 |
+
"learning_rate": 1.7795943454210204e-05,
|
25110 |
+
"loss": 1.3304,
|
25111 |
+
"step": 7172
|
25112 |
+
},
|
25113 |
+
{
|
25114 |
+
"epoch": 0.5511677934849416,
|
25115 |
+
"grad_norm": 2.6509580612182617,
|
25116 |
+
"learning_rate": 1.7795328826060234e-05,
|
25117 |
+
"loss": 1.3488,
|
25118 |
+
"step": 7174
|
25119 |
+
},
|
25120 |
+
{
|
25121 |
+
"epoch": 0.551321450522434,
|
25122 |
+
"grad_norm": 2.720896005630493,
|
25123 |
+
"learning_rate": 1.7794714197910267e-05,
|
25124 |
+
"loss": 1.2818,
|
25125 |
+
"step": 7176
|
25126 |
+
},
|
25127 |
+
{
|
25128 |
+
"epoch": 0.5514751075599262,
|
25129 |
+
"grad_norm": 3.513991355895996,
|
25130 |
+
"learning_rate": 1.7794099569760297e-05,
|
25131 |
+
"loss": 1.2761,
|
25132 |
+
"step": 7178
|
25133 |
+
},
|
25134 |
+
{
|
25135 |
+
"epoch": 0.5516287645974186,
|
25136 |
+
"grad_norm": 3.5131587982177734,
|
25137 |
+
"learning_rate": 1.7793484941610327e-05,
|
25138 |
+
"loss": 1.4,
|
25139 |
+
"step": 7180
|
25140 |
+
},
|
25141 |
+
{
|
25142 |
+
"epoch": 0.5517824216349109,
|
25143 |
+
"grad_norm": 3.281924247741699,
|
25144 |
+
"learning_rate": 1.7792870313460356e-05,
|
25145 |
+
"loss": 1.3619,
|
25146 |
+
"step": 7182
|
25147 |
+
},
|
25148 |
+
{
|
25149 |
+
"epoch": 0.5519360786724032,
|
25150 |
+
"grad_norm": 2.9389426708221436,
|
25151 |
+
"learning_rate": 1.779225568531039e-05,
|
25152 |
+
"loss": 1.6219,
|
25153 |
+
"step": 7184
|
25154 |
+
},
|
25155 |
+
{
|
25156 |
+
"epoch": 0.5520897357098955,
|
25157 |
+
"grad_norm": 2.482316493988037,
|
25158 |
+
"learning_rate": 1.779164105716042e-05,
|
25159 |
+
"loss": 1.2586,
|
25160 |
+
"step": 7186
|
25161 |
+
},
|
25162 |
+
{
|
25163 |
+
"epoch": 0.5522433927473879,
|
25164 |
+
"grad_norm": 3.0932509899139404,
|
25165 |
+
"learning_rate": 1.779102642901045e-05,
|
25166 |
+
"loss": 1.3397,
|
25167 |
+
"step": 7188
|
25168 |
+
},
|
25169 |
+
{
|
25170 |
+
"epoch": 0.5523970497848801,
|
25171 |
+
"grad_norm": 3.1350340843200684,
|
25172 |
+
"learning_rate": 1.7790411800860482e-05,
|
25173 |
+
"loss": 1.255,
|
25174 |
+
"step": 7190
|
25175 |
+
},
|
25176 |
+
{
|
25177 |
+
"epoch": 0.5525507068223725,
|
25178 |
+
"grad_norm": 2.5633490085601807,
|
25179 |
+
"learning_rate": 1.778979717271051e-05,
|
25180 |
+
"loss": 1.1744,
|
25181 |
+
"step": 7192
|
25182 |
+
},
|
25183 |
+
{
|
25184 |
+
"epoch": 0.5527043638598648,
|
25185 |
+
"grad_norm": 2.803712844848633,
|
25186 |
+
"learning_rate": 1.778918254456054e-05,
|
25187 |
+
"loss": 1.4449,
|
25188 |
+
"step": 7194
|
25189 |
+
},
|
25190 |
+
{
|
25191 |
+
"epoch": 0.5528580208973571,
|
25192 |
+
"grad_norm": 2.5207178592681885,
|
25193 |
+
"learning_rate": 1.7788567916410574e-05,
|
25194 |
+
"loss": 1.4056,
|
25195 |
+
"step": 7196
|
25196 |
+
},
|
25197 |
+
{
|
25198 |
+
"epoch": 0.5530116779348494,
|
25199 |
+
"grad_norm": 2.7499046325683594,
|
25200 |
+
"learning_rate": 1.7787953288260604e-05,
|
25201 |
+
"loss": 1.2398,
|
25202 |
+
"step": 7198
|
25203 |
+
},
|
25204 |
+
{
|
25205 |
+
"epoch": 0.5531653349723418,
|
25206 |
+
"grad_norm": 3.0889878273010254,
|
25207 |
+
"learning_rate": 1.7787338660110634e-05,
|
25208 |
+
"loss": 1.4251,
|
25209 |
+
"step": 7200
|
25210 |
+
},
|
25211 |
+
{
|
25212 |
+
"epoch": 0.553318992009834,
|
25213 |
+
"grad_norm": 2.5837948322296143,
|
25214 |
+
"learning_rate": 1.7786724031960667e-05,
|
25215 |
+
"loss": 1.345,
|
25216 |
+
"step": 7202
|
25217 |
+
},
|
25218 |
+
{
|
25219 |
+
"epoch": 0.5534726490473264,
|
25220 |
+
"grad_norm": 3.2794241905212402,
|
25221 |
+
"learning_rate": 1.7786109403810696e-05,
|
25222 |
+
"loss": 1.4219,
|
25223 |
+
"step": 7204
|
25224 |
+
},
|
25225 |
+
{
|
25226 |
+
"epoch": 0.5536263060848187,
|
25227 |
+
"grad_norm": 2.494079828262329,
|
25228 |
+
"learning_rate": 1.778549477566073e-05,
|
25229 |
+
"loss": 1.2795,
|
25230 |
+
"step": 7206
|
25231 |
+
},
|
25232 |
+
{
|
25233 |
+
"epoch": 0.553779963122311,
|
25234 |
+
"grad_norm": 3.3279178142547607,
|
25235 |
+
"learning_rate": 1.7784880147510756e-05,
|
25236 |
+
"loss": 1.3046,
|
25237 |
+
"step": 7208
|
25238 |
+
},
|
25239 |
+
{
|
25240 |
+
"epoch": 0.5539336201598033,
|
25241 |
+
"grad_norm": 2.7889404296875,
|
25242 |
+
"learning_rate": 1.778426551936079e-05,
|
25243 |
+
"loss": 1.2484,
|
25244 |
+
"step": 7210
|
25245 |
+
},
|
25246 |
+
{
|
25247 |
+
"epoch": 0.5540872771972957,
|
25248 |
+
"grad_norm": 2.8925845623016357,
|
25249 |
+
"learning_rate": 1.778365089121082e-05,
|
25250 |
+
"loss": 1.2489,
|
25251 |
+
"step": 7212
|
25252 |
+
},
|
25253 |
+
{
|
25254 |
+
"epoch": 0.5542409342347879,
|
25255 |
+
"grad_norm": 3.148367166519165,
|
25256 |
+
"learning_rate": 1.7783036263060848e-05,
|
25257 |
+
"loss": 1.4886,
|
25258 |
+
"step": 7214
|
25259 |
+
},
|
25260 |
+
{
|
25261 |
+
"epoch": 0.5543945912722803,
|
25262 |
+
"grad_norm": 3.505509376525879,
|
25263 |
+
"learning_rate": 1.778242163491088e-05,
|
25264 |
+
"loss": 1.4575,
|
25265 |
+
"step": 7216
|
25266 |
+
},
|
25267 |
+
{
|
25268 |
+
"epoch": 0.5545482483097726,
|
25269 |
+
"grad_norm": 2.391782760620117,
|
25270 |
+
"learning_rate": 1.778180700676091e-05,
|
25271 |
+
"loss": 1.2687,
|
25272 |
+
"step": 7218
|
25273 |
+
},
|
25274 |
+
{
|
25275 |
+
"epoch": 0.5547019053472649,
|
25276 |
+
"grad_norm": 2.8791768550872803,
|
25277 |
+
"learning_rate": 1.778119237861094e-05,
|
25278 |
+
"loss": 1.4009,
|
25279 |
+
"step": 7220
|
25280 |
+
},
|
25281 |
+
{
|
25282 |
+
"epoch": 0.5548555623847572,
|
25283 |
+
"grad_norm": 3.043830633163452,
|
25284 |
+
"learning_rate": 1.7780577750460974e-05,
|
25285 |
+
"loss": 1.3839,
|
25286 |
+
"step": 7222
|
25287 |
+
},
|
25288 |
+
{
|
25289 |
+
"epoch": 0.5550092194222496,
|
25290 |
+
"grad_norm": 3.7112839221954346,
|
25291 |
+
"learning_rate": 1.7779963122311003e-05,
|
25292 |
+
"loss": 1.3653,
|
25293 |
+
"step": 7224
|
25294 |
+
},
|
25295 |
+
{
|
25296 |
+
"epoch": 0.5551628764597418,
|
25297 |
+
"grad_norm": 2.8248283863067627,
|
25298 |
+
"learning_rate": 1.7779348494161036e-05,
|
25299 |
+
"loss": 1.3582,
|
25300 |
+
"step": 7226
|
25301 |
+
},
|
25302 |
+
{
|
25303 |
+
"epoch": 0.5553165334972342,
|
25304 |
+
"grad_norm": 2.891206979751587,
|
25305 |
+
"learning_rate": 1.7778733866011063e-05,
|
25306 |
+
"loss": 1.2634,
|
25307 |
+
"step": 7228
|
25308 |
+
},
|
25309 |
+
{
|
25310 |
+
"epoch": 0.5554701905347265,
|
25311 |
+
"grad_norm": 2.6088945865631104,
|
25312 |
+
"learning_rate": 1.7778119237861096e-05,
|
25313 |
+
"loss": 1.4858,
|
25314 |
+
"step": 7230
|
25315 |
+
},
|
25316 |
+
{
|
25317 |
+
"epoch": 0.5556238475722188,
|
25318 |
+
"grad_norm": 3.5376429557800293,
|
25319 |
+
"learning_rate": 1.7777504609711125e-05,
|
25320 |
+
"loss": 1.3223,
|
25321 |
+
"step": 7232
|
25322 |
+
},
|
25323 |
+
{
|
25324 |
+
"epoch": 0.5557775046097111,
|
25325 |
+
"grad_norm": 2.7143330574035645,
|
25326 |
+
"learning_rate": 1.7776889981561155e-05,
|
25327 |
+
"loss": 1.2609,
|
25328 |
+
"step": 7234
|
25329 |
+
},
|
25330 |
+
{
|
25331 |
+
"epoch": 0.5559311616472035,
|
25332 |
+
"grad_norm": 2.7578842639923096,
|
25333 |
+
"learning_rate": 1.7776275353411188e-05,
|
25334 |
+
"loss": 1.3574,
|
25335 |
+
"step": 7236
|
25336 |
+
},
|
25337 |
+
{
|
25338 |
+
"epoch": 0.5560848186846957,
|
25339 |
+
"grad_norm": 2.8860747814178467,
|
25340 |
+
"learning_rate": 1.7775660725261218e-05,
|
25341 |
+
"loss": 1.2957,
|
25342 |
+
"step": 7238
|
25343 |
+
},
|
25344 |
+
{
|
25345 |
+
"epoch": 0.5562384757221881,
|
25346 |
+
"grad_norm": 3.3761367797851562,
|
25347 |
+
"learning_rate": 1.7775046097111248e-05,
|
25348 |
+
"loss": 1.396,
|
25349 |
+
"step": 7240
|
25350 |
+
},
|
25351 |
+
{
|
25352 |
+
"epoch": 0.5563921327596804,
|
25353 |
+
"grad_norm": 2.8714826107025146,
|
25354 |
+
"learning_rate": 1.777443146896128e-05,
|
25355 |
+
"loss": 1.289,
|
25356 |
+
"step": 7242
|
25357 |
+
},
|
25358 |
+
{
|
25359 |
+
"epoch": 0.5565457897971727,
|
25360 |
+
"grad_norm": 2.9392268657684326,
|
25361 |
+
"learning_rate": 1.777381684081131e-05,
|
25362 |
+
"loss": 1.3034,
|
25363 |
+
"step": 7244
|
25364 |
+
},
|
25365 |
+
{
|
25366 |
+
"epoch": 0.556699446834665,
|
25367 |
+
"grad_norm": 2.9862611293792725,
|
25368 |
+
"learning_rate": 1.7773202212661343e-05,
|
25369 |
+
"loss": 1.2842,
|
25370 |
+
"step": 7246
|
25371 |
+
},
|
25372 |
+
{
|
25373 |
+
"epoch": 0.5568531038721574,
|
25374 |
+
"grad_norm": 3.157655954360962,
|
25375 |
+
"learning_rate": 1.7772587584511373e-05,
|
25376 |
+
"loss": 1.3206,
|
25377 |
+
"step": 7248
|
25378 |
+
},
|
25379 |
+
{
|
25380 |
+
"epoch": 0.5570067609096496,
|
25381 |
+
"grad_norm": 2.7796456813812256,
|
25382 |
+
"learning_rate": 1.7771972956361403e-05,
|
25383 |
+
"loss": 1.3415,
|
25384 |
+
"step": 7250
|
25385 |
+
},
|
25386 |
+
{
|
25387 |
+
"epoch": 0.557160417947142,
|
25388 |
+
"grad_norm": 2.800544023513794,
|
25389 |
+
"learning_rate": 1.7771358328211436e-05,
|
25390 |
+
"loss": 1.3417,
|
25391 |
+
"step": 7252
|
25392 |
+
},
|
25393 |
+
{
|
25394 |
+
"epoch": 0.5573140749846343,
|
25395 |
+
"grad_norm": 2.797663688659668,
|
25396 |
+
"learning_rate": 1.7770743700061462e-05,
|
25397 |
+
"loss": 1.2614,
|
25398 |
+
"step": 7254
|
25399 |
+
},
|
25400 |
+
{
|
25401 |
+
"epoch": 0.5574677320221266,
|
25402 |
+
"grad_norm": 2.899327278137207,
|
25403 |
+
"learning_rate": 1.7770129071911495e-05,
|
25404 |
+
"loss": 1.4148,
|
25405 |
+
"step": 7256
|
25406 |
+
},
|
25407 |
+
{
|
25408 |
+
"epoch": 0.5576213890596189,
|
25409 |
+
"grad_norm": 3.1357805728912354,
|
25410 |
+
"learning_rate": 1.7769514443761525e-05,
|
25411 |
+
"loss": 1.4304,
|
25412 |
+
"step": 7258
|
25413 |
+
},
|
25414 |
+
{
|
25415 |
+
"epoch": 0.5577750460971113,
|
25416 |
+
"grad_norm": 2.704547643661499,
|
25417 |
+
"learning_rate": 1.7768899815611555e-05,
|
25418 |
+
"loss": 1.3199,
|
25419 |
+
"step": 7260
|
25420 |
+
},
|
25421 |
+
{
|
25422 |
+
"epoch": 0.5579287031346035,
|
25423 |
+
"grad_norm": 3.0023064613342285,
|
25424 |
+
"learning_rate": 1.7768285187461588e-05,
|
25425 |
+
"loss": 1.3294,
|
25426 |
+
"step": 7262
|
25427 |
+
},
|
25428 |
+
{
|
25429 |
+
"epoch": 0.5580823601720959,
|
25430 |
+
"grad_norm": 2.6803040504455566,
|
25431 |
+
"learning_rate": 1.7767670559311617e-05,
|
25432 |
+
"loss": 1.3632,
|
25433 |
+
"step": 7264
|
25434 |
+
},
|
25435 |
+
{
|
25436 |
+
"epoch": 0.5582360172095882,
|
25437 |
+
"grad_norm": 3.395805597305298,
|
25438 |
+
"learning_rate": 1.776705593116165e-05,
|
25439 |
+
"loss": 1.513,
|
25440 |
+
"step": 7266
|
25441 |
+
},
|
25442 |
+
{
|
25443 |
+
"epoch": 0.5583896742470805,
|
25444 |
+
"grad_norm": 3.188375949859619,
|
25445 |
+
"learning_rate": 1.776644130301168e-05,
|
25446 |
+
"loss": 1.2284,
|
25447 |
+
"step": 7268
|
25448 |
+
},
|
25449 |
+
{
|
25450 |
+
"epoch": 0.5585433312845728,
|
25451 |
+
"grad_norm": 3.19944167137146,
|
25452 |
+
"learning_rate": 1.776582667486171e-05,
|
25453 |
+
"loss": 1.5306,
|
25454 |
+
"step": 7270
|
25455 |
+
},
|
25456 |
+
{
|
25457 |
+
"epoch": 0.5586969883220652,
|
25458 |
+
"grad_norm": 3.132715940475464,
|
25459 |
+
"learning_rate": 1.7765212046711743e-05,
|
25460 |
+
"loss": 1.5381,
|
25461 |
+
"step": 7272
|
25462 |
+
},
|
25463 |
+
{
|
25464 |
+
"epoch": 0.5588506453595574,
|
25465 |
+
"grad_norm": 2.777874231338501,
|
25466 |
+
"learning_rate": 1.7764597418561772e-05,
|
25467 |
+
"loss": 1.3303,
|
25468 |
+
"step": 7274
|
25469 |
+
},
|
25470 |
+
{
|
25471 |
+
"epoch": 0.5590043023970498,
|
25472 |
+
"grad_norm": 3.418839931488037,
|
25473 |
+
"learning_rate": 1.7763982790411802e-05,
|
25474 |
+
"loss": 1.3595,
|
25475 |
+
"step": 7276
|
25476 |
+
},
|
25477 |
+
{
|
25478 |
+
"epoch": 0.559157959434542,
|
25479 |
+
"grad_norm": 3.0690410137176514,
|
25480 |
+
"learning_rate": 1.7763368162261835e-05,
|
25481 |
+
"loss": 1.3164,
|
25482 |
+
"step": 7278
|
25483 |
+
},
|
25484 |
+
{
|
25485 |
+
"epoch": 0.5593116164720344,
|
25486 |
+
"grad_norm": 3.341252326965332,
|
25487 |
+
"learning_rate": 1.776275353411186e-05,
|
25488 |
+
"loss": 1.4019,
|
25489 |
+
"step": 7280
|
25490 |
+
},
|
25491 |
+
{
|
25492 |
+
"epoch": 0.5594652735095267,
|
25493 |
+
"grad_norm": 2.947756052017212,
|
25494 |
+
"learning_rate": 1.7762138905961895e-05,
|
25495 |
+
"loss": 1.1799,
|
25496 |
+
"step": 7282
|
25497 |
+
},
|
25498 |
+
{
|
25499 |
+
"epoch": 0.5596189305470191,
|
25500 |
+
"grad_norm": 3.1042239665985107,
|
25501 |
+
"learning_rate": 1.7761524277811924e-05,
|
25502 |
+
"loss": 1.3689,
|
25503 |
+
"step": 7284
|
25504 |
+
},
|
25505 |
+
{
|
25506 |
+
"epoch": 0.5597725875845113,
|
25507 |
+
"grad_norm": 3.0481724739074707,
|
25508 |
+
"learning_rate": 1.7760909649661957e-05,
|
25509 |
+
"loss": 1.3483,
|
25510 |
+
"step": 7286
|
25511 |
+
},
|
25512 |
+
{
|
25513 |
+
"epoch": 0.5599262446220037,
|
25514 |
+
"grad_norm": 3.041898250579834,
|
25515 |
+
"learning_rate": 1.7760295021511987e-05,
|
25516 |
+
"loss": 1.3574,
|
25517 |
+
"step": 7288
|
25518 |
+
},
|
25519 |
+
{
|
25520 |
+
"epoch": 0.5600799016594961,
|
25521 |
+
"grad_norm": 2.627877712249756,
|
25522 |
+
"learning_rate": 1.7759680393362017e-05,
|
25523 |
+
"loss": 1.3905,
|
25524 |
+
"step": 7290
|
25525 |
+
},
|
25526 |
+
{
|
25527 |
+
"epoch": 0.5602335586969883,
|
25528 |
+
"grad_norm": 2.628779649734497,
|
25529 |
+
"learning_rate": 1.775906576521205e-05,
|
25530 |
+
"loss": 1.3318,
|
25531 |
+
"step": 7292
|
25532 |
+
},
|
25533 |
+
{
|
25534 |
+
"epoch": 0.5603872157344807,
|
25535 |
+
"grad_norm": 2.788853406906128,
|
25536 |
+
"learning_rate": 1.775845113706208e-05,
|
25537 |
+
"loss": 1.3654,
|
25538 |
+
"step": 7294
|
25539 |
+
},
|
25540 |
+
{
|
25541 |
+
"epoch": 0.560540872771973,
|
25542 |
+
"grad_norm": 2.898160696029663,
|
25543 |
+
"learning_rate": 1.775783650891211e-05,
|
25544 |
+
"loss": 1.3316,
|
25545 |
+
"step": 7296
|
25546 |
+
},
|
25547 |
+
{
|
25548 |
+
"epoch": 0.5606945298094653,
|
25549 |
+
"grad_norm": 3.0796875953674316,
|
25550 |
+
"learning_rate": 1.7757221880762142e-05,
|
25551 |
+
"loss": 1.2504,
|
25552 |
+
"step": 7298
|
25553 |
+
},
|
25554 |
+
{
|
25555 |
+
"epoch": 0.5608481868469576,
|
25556 |
+
"grad_norm": 2.4467694759368896,
|
25557 |
+
"learning_rate": 1.7756607252612172e-05,
|
25558 |
+
"loss": 1.2919,
|
25559 |
+
"step": 7300
|
25560 |
}
|
25561 |
],
|
25562 |
"logging_steps": 2,
|
|
|
25576 |
"attributes": {}
|
25577 |
}
|
25578 |
},
|
25579 |
+
"total_flos": 4.6598612105927066e+19,
|
25580 |
"train_batch_size": 8,
|
25581 |
"trial_name": null,
|
25582 |
"trial_params": null
|