Training in progress, step 3346, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1676 -3
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 516810008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e61158bae1b1d8ead7489b9d1611fe4d686eced20ba956f07db0ff0b8874833
|
3 |
size 516810008
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e6770959206cdc4982812252fd247c3bc6b12852c762625cdd31f5ebfabb90c
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c223764a53f3c079e3e1a5a08b73492d4dbd90d36099e60b51596323355bcb0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -21764,6 +21764,1679 @@
|
|
21764 |
"learning_rate": 2.406622520140733e-06,
|
21765 |
"loss": 0.0,
|
21766 |
"step": 3107
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21767 |
}
|
21768 |
],
|
21769 |
"logging_steps": 1,
|
@@ -21783,7 +23456,7 @@
|
|
21783 |
"attributes": {}
|
21784 |
}
|
21785 |
},
|
21786 |
-
"total_flos":
|
21787 |
"train_batch_size": 2,
|
21788 |
"trial_name": null,
|
21789 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9461974698269593,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3346,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
21764 |
"learning_rate": 2.406622520140733e-06,
|
21765 |
"loss": 0.0,
|
21766 |
"step": 3107
|
21767 |
+
},
|
21768 |
+
{
|
21769 |
+
"epoch": 1.807765013814163,
|
21770 |
+
"grad_norm": NaN,
|
21771 |
+
"learning_rate": 2.3922198670246587e-06,
|
21772 |
+
"loss": 0.0,
|
21773 |
+
"step": 3108
|
21774 |
+
},
|
21775 |
+
{
|
21776 |
+
"epoch": 1.8083466627890068,
|
21777 |
+
"grad_norm": NaN,
|
21778 |
+
"learning_rate": 2.3778593840424967e-06,
|
21779 |
+
"loss": 0.0,
|
21780 |
+
"step": 3109
|
21781 |
+
},
|
21782 |
+
{
|
21783 |
+
"epoch": 1.8089283117638506,
|
21784 |
+
"grad_norm": NaN,
|
21785 |
+
"learning_rate": 2.363541083914528e-06,
|
21786 |
+
"loss": 0.0,
|
21787 |
+
"step": 3110
|
21788 |
+
},
|
21789 |
+
{
|
21790 |
+
"epoch": 1.8095099607386942,
|
21791 |
+
"grad_norm": NaN,
|
21792 |
+
"learning_rate": 2.3492649793236397e-06,
|
21793 |
+
"loss": 0.0,
|
21794 |
+
"step": 3111
|
21795 |
+
},
|
21796 |
+
{
|
21797 |
+
"epoch": 1.8100916097135378,
|
21798 |
+
"grad_norm": NaN,
|
21799 |
+
"learning_rate": 2.335031082915351e-06,
|
21800 |
+
"loss": 0.0,
|
21801 |
+
"step": 3112
|
21802 |
+
},
|
21803 |
+
{
|
21804 |
+
"epoch": 1.8106732586883816,
|
21805 |
+
"grad_norm": NaN,
|
21806 |
+
"learning_rate": 2.3208394072978034e-06,
|
21807 |
+
"loss": 0.0,
|
21808 |
+
"step": 3113
|
21809 |
+
},
|
21810 |
+
{
|
21811 |
+
"epoch": 1.8112549076632254,
|
21812 |
+
"grad_norm": NaN,
|
21813 |
+
"learning_rate": 2.3066899650417306e-06,
|
21814 |
+
"loss": 0.0,
|
21815 |
+
"step": 3114
|
21816 |
+
},
|
21817 |
+
{
|
21818 |
+
"epoch": 1.811836556638069,
|
21819 |
+
"grad_norm": NaN,
|
21820 |
+
"learning_rate": 2.2925827686804568e-06,
|
21821 |
+
"loss": 0.0,
|
21822 |
+
"step": 3115
|
21823 |
+
},
|
21824 |
+
{
|
21825 |
+
"epoch": 1.8124182056129126,
|
21826 |
+
"grad_norm": NaN,
|
21827 |
+
"learning_rate": 2.278517830709892e-06,
|
21828 |
+
"loss": 0.0,
|
21829 |
+
"step": 3116
|
21830 |
+
},
|
21831 |
+
{
|
21832 |
+
"epoch": 1.8129998545877561,
|
21833 |
+
"grad_norm": NaN,
|
21834 |
+
"learning_rate": 2.264495163588509e-06,
|
21835 |
+
"loss": 0.0,
|
21836 |
+
"step": 3117
|
21837 |
+
},
|
21838 |
+
{
|
21839 |
+
"epoch": 1.8135815035626,
|
21840 |
+
"grad_norm": NaN,
|
21841 |
+
"learning_rate": 2.2505147797373395e-06,
|
21842 |
+
"loss": 0.0,
|
21843 |
+
"step": 3118
|
21844 |
+
},
|
21845 |
+
{
|
21846 |
+
"epoch": 1.8141631525374438,
|
21847 |
+
"grad_norm": NaN,
|
21848 |
+
"learning_rate": 2.23657669153996e-06,
|
21849 |
+
"loss": 0.0,
|
21850 |
+
"step": 3119
|
21851 |
+
},
|
21852 |
+
{
|
21853 |
+
"epoch": 1.8147448015122873,
|
21854 |
+
"grad_norm": NaN,
|
21855 |
+
"learning_rate": 2.2226809113424895e-06,
|
21856 |
+
"loss": 0.0,
|
21857 |
+
"step": 3120
|
21858 |
+
},
|
21859 |
+
{
|
21860 |
+
"epoch": 1.815326450487131,
|
21861 |
+
"grad_norm": NaN,
|
21862 |
+
"learning_rate": 2.20882745145356e-06,
|
21863 |
+
"loss": 0.0,
|
21864 |
+
"step": 3121
|
21865 |
+
},
|
21866 |
+
{
|
21867 |
+
"epoch": 1.8159080994619747,
|
21868 |
+
"grad_norm": NaN,
|
21869 |
+
"learning_rate": 2.1950163241443277e-06,
|
21870 |
+
"loss": 0.0,
|
21871 |
+
"step": 3122
|
21872 |
+
},
|
21873 |
+
{
|
21874 |
+
"epoch": 1.8164897484368185,
|
21875 |
+
"grad_norm": NaN,
|
21876 |
+
"learning_rate": 2.1812475416484456e-06,
|
21877 |
+
"loss": 0.0,
|
21878 |
+
"step": 3123
|
21879 |
+
},
|
21880 |
+
{
|
21881 |
+
"epoch": 1.8170713974116621,
|
21882 |
+
"grad_norm": NaN,
|
21883 |
+
"learning_rate": 2.167521116162052e-06,
|
21884 |
+
"loss": 0.0,
|
21885 |
+
"step": 3124
|
21886 |
+
},
|
21887 |
+
{
|
21888 |
+
"epoch": 1.8176530463865057,
|
21889 |
+
"grad_norm": NaN,
|
21890 |
+
"learning_rate": 2.1538370598437773e-06,
|
21891 |
+
"loss": 0.0,
|
21892 |
+
"step": 3125
|
21893 |
+
},
|
21894 |
+
{
|
21895 |
+
"epoch": 1.8182346953613493,
|
21896 |
+
"grad_norm": NaN,
|
21897 |
+
"learning_rate": 2.1401953848147195e-06,
|
21898 |
+
"loss": 0.0,
|
21899 |
+
"step": 3126
|
21900 |
+
},
|
21901 |
+
{
|
21902 |
+
"epoch": 1.818816344336193,
|
21903 |
+
"grad_norm": NaN,
|
21904 |
+
"learning_rate": 2.1265961031584346e-06,
|
21905 |
+
"loss": 0.0,
|
21906 |
+
"step": 3127
|
21907 |
+
},
|
21908 |
+
{
|
21909 |
+
"epoch": 1.819397993311037,
|
21910 |
+
"grad_norm": NaN,
|
21911 |
+
"learning_rate": 2.1130392269209155e-06,
|
21912 |
+
"loss": 0.0,
|
21913 |
+
"step": 3128
|
21914 |
+
},
|
21915 |
+
{
|
21916 |
+
"epoch": 1.8199796422858805,
|
21917 |
+
"grad_norm": NaN,
|
21918 |
+
"learning_rate": 2.099524768110622e-06,
|
21919 |
+
"loss": 0.0,
|
21920 |
+
"step": 3129
|
21921 |
+
},
|
21922 |
+
{
|
21923 |
+
"epoch": 1.820561291260724,
|
21924 |
+
"grad_norm": NaN,
|
21925 |
+
"learning_rate": 2.086052738698413e-06,
|
21926 |
+
"loss": 0.0,
|
21927 |
+
"step": 3130
|
21928 |
+
},
|
21929 |
+
{
|
21930 |
+
"epoch": 1.8211429402355679,
|
21931 |
+
"grad_norm": NaN,
|
21932 |
+
"learning_rate": 2.072623150617581e-06,
|
21933 |
+
"loss": 0.0,
|
21934 |
+
"step": 3131
|
21935 |
+
},
|
21936 |
+
{
|
21937 |
+
"epoch": 1.8217245892104115,
|
21938 |
+
"grad_norm": NaN,
|
21939 |
+
"learning_rate": 2.0592360157638213e-06,
|
21940 |
+
"loss": 0.0,
|
21941 |
+
"step": 3132
|
21942 |
+
},
|
21943 |
+
{
|
21944 |
+
"epoch": 1.8223062381852553,
|
21945 |
+
"grad_norm": NaN,
|
21946 |
+
"learning_rate": 2.04589134599521e-06,
|
21947 |
+
"loss": 0.0,
|
21948 |
+
"step": 3133
|
21949 |
+
},
|
21950 |
+
{
|
21951 |
+
"epoch": 1.8228878871600989,
|
21952 |
+
"grad_norm": NaN,
|
21953 |
+
"learning_rate": 2.0325891531322305e-06,
|
21954 |
+
"loss": 0.0,
|
21955 |
+
"step": 3134
|
21956 |
+
},
|
21957 |
+
{
|
21958 |
+
"epoch": 1.8234695361349424,
|
21959 |
+
"grad_norm": NaN,
|
21960 |
+
"learning_rate": 2.0193294489577295e-06,
|
21961 |
+
"loss": 0.0,
|
21962 |
+
"step": 3135
|
21963 |
+
},
|
21964 |
+
{
|
21965 |
+
"epoch": 1.8240511851097863,
|
21966 |
+
"grad_norm": NaN,
|
21967 |
+
"learning_rate": 2.006112245216918e-06,
|
21968 |
+
"loss": 0.0,
|
21969 |
+
"step": 3136
|
21970 |
+
},
|
21971 |
+
{
|
21972 |
+
"epoch": 1.82463283408463,
|
21973 |
+
"grad_norm": NaN,
|
21974 |
+
"learning_rate": 1.9929375536173576e-06,
|
21975 |
+
"loss": 0.0,
|
21976 |
+
"step": 3137
|
21977 |
+
},
|
21978 |
+
{
|
21979 |
+
"epoch": 1.8252144830594736,
|
21980 |
+
"grad_norm": NaN,
|
21981 |
+
"learning_rate": 1.9798053858289645e-06,
|
21982 |
+
"loss": 0.0,
|
21983 |
+
"step": 3138
|
21984 |
+
},
|
21985 |
+
{
|
21986 |
+
"epoch": 1.8257961320343172,
|
21987 |
+
"grad_norm": NaN,
|
21988 |
+
"learning_rate": 1.9667157534839887e-06,
|
21989 |
+
"loss": 0.0,
|
21990 |
+
"step": 3139
|
21991 |
+
},
|
21992 |
+
{
|
21993 |
+
"epoch": 1.8263777810091608,
|
21994 |
+
"grad_norm": NaN,
|
21995 |
+
"learning_rate": 1.953668668176983e-06,
|
21996 |
+
"loss": 0.0,
|
21997 |
+
"step": 3140
|
21998 |
+
},
|
21999 |
+
{
|
22000 |
+
"epoch": 1.8269594299840046,
|
22001 |
+
"grad_norm": NaN,
|
22002 |
+
"learning_rate": 1.9406641414648374e-06,
|
22003 |
+
"loss": 0.0,
|
22004 |
+
"step": 3141
|
22005 |
+
},
|
22006 |
+
{
|
22007 |
+
"epoch": 1.8275410789588484,
|
22008 |
+
"grad_norm": NaN,
|
22009 |
+
"learning_rate": 1.9277021848667253e-06,
|
22010 |
+
"loss": 0.0,
|
22011 |
+
"step": 3142
|
22012 |
+
},
|
22013 |
+
{
|
22014 |
+
"epoch": 1.828122727933692,
|
22015 |
+
"grad_norm": NaN,
|
22016 |
+
"learning_rate": 1.9147828098641185e-06,
|
22017 |
+
"loss": 0.0,
|
22018 |
+
"step": 3143
|
22019 |
+
},
|
22020 |
+
{
|
22021 |
+
"epoch": 1.8287043769085356,
|
22022 |
+
"grad_norm": NaN,
|
22023 |
+
"learning_rate": 1.9019060279007794e-06,
|
22024 |
+
"loss": 0.0,
|
22025 |
+
"step": 3144
|
22026 |
+
},
|
22027 |
+
{
|
22028 |
+
"epoch": 1.8292860258833794,
|
22029 |
+
"grad_norm": NaN,
|
22030 |
+
"learning_rate": 1.8890718503827287e-06,
|
22031 |
+
"loss": 0.0,
|
22032 |
+
"step": 3145
|
22033 |
+
},
|
22034 |
+
{
|
22035 |
+
"epoch": 1.8298676748582232,
|
22036 |
+
"grad_norm": NaN,
|
22037 |
+
"learning_rate": 1.8762802886782504e-06,
|
22038 |
+
"loss": 0.0,
|
22039 |
+
"step": 3146
|
22040 |
+
},
|
22041 |
+
{
|
22042 |
+
"epoch": 1.8304493238330668,
|
22043 |
+
"grad_norm": NaN,
|
22044 |
+
"learning_rate": 1.8635313541178977e-06,
|
22045 |
+
"loss": 0.0,
|
22046 |
+
"step": 3147
|
22047 |
+
},
|
22048 |
+
{
|
22049 |
+
"epoch": 1.8310309728079104,
|
22050 |
+
"grad_norm": NaN,
|
22051 |
+
"learning_rate": 1.8508250579944485e-06,
|
22052 |
+
"loss": 0.0,
|
22053 |
+
"step": 3148
|
22054 |
+
},
|
22055 |
+
{
|
22056 |
+
"epoch": 1.831612621782754,
|
22057 |
+
"grad_norm": NaN,
|
22058 |
+
"learning_rate": 1.838161411562911e-06,
|
22059 |
+
"loss": 0.0,
|
22060 |
+
"step": 3149
|
22061 |
+
},
|
22062 |
+
{
|
22063 |
+
"epoch": 1.8321942707575978,
|
22064 |
+
"grad_norm": NaN,
|
22065 |
+
"learning_rate": 1.8255404260405284e-06,
|
22066 |
+
"loss": 0.0,
|
22067 |
+
"step": 3150
|
22068 |
+
},
|
22069 |
+
{
|
22070 |
+
"epoch": 1.8327759197324416,
|
22071 |
+
"grad_norm": NaN,
|
22072 |
+
"learning_rate": 1.8129621126067365e-06,
|
22073 |
+
"loss": 0.0,
|
22074 |
+
"step": 3151
|
22075 |
+
},
|
22076 |
+
{
|
22077 |
+
"epoch": 1.8333575687072852,
|
22078 |
+
"grad_norm": NaN,
|
22079 |
+
"learning_rate": 1.8004264824031948e-06,
|
22080 |
+
"loss": 0.0,
|
22081 |
+
"step": 3152
|
22082 |
+
},
|
22083 |
+
{
|
22084 |
+
"epoch": 1.8339392176821288,
|
22085 |
+
"grad_norm": NaN,
|
22086 |
+
"learning_rate": 1.7879335465337377e-06,
|
22087 |
+
"loss": 0.0,
|
22088 |
+
"step": 3153
|
22089 |
+
},
|
22090 |
+
{
|
22091 |
+
"epoch": 1.8345208666569726,
|
22092 |
+
"grad_norm": NaN,
|
22093 |
+
"learning_rate": 1.7754833160643858e-06,
|
22094 |
+
"loss": 0.0,
|
22095 |
+
"step": 3154
|
22096 |
+
},
|
22097 |
+
{
|
22098 |
+
"epoch": 1.8351025156318161,
|
22099 |
+
"grad_norm": NaN,
|
22100 |
+
"learning_rate": 1.7630758020233396e-06,
|
22101 |
+
"loss": 0.0,
|
22102 |
+
"step": 3155
|
22103 |
+
},
|
22104 |
+
{
|
22105 |
+
"epoch": 1.83568416460666,
|
22106 |
+
"grad_norm": NaN,
|
22107 |
+
"learning_rate": 1.750711015400952e-06,
|
22108 |
+
"loss": 0.0,
|
22109 |
+
"step": 3156
|
22110 |
+
},
|
22111 |
+
{
|
22112 |
+
"epoch": 1.8362658135815035,
|
22113 |
+
"grad_norm": NaN,
|
22114 |
+
"learning_rate": 1.73838896714974e-06,
|
22115 |
+
"loss": 0.0,
|
22116 |
+
"step": 3157
|
22117 |
+
},
|
22118 |
+
{
|
22119 |
+
"epoch": 1.8368474625563471,
|
22120 |
+
"grad_norm": NaN,
|
22121 |
+
"learning_rate": 1.7261096681843559e-06,
|
22122 |
+
"loss": 0.0,
|
22123 |
+
"step": 3158
|
22124 |
+
},
|
22125 |
+
{
|
22126 |
+
"epoch": 1.837429111531191,
|
22127 |
+
"grad_norm": NaN,
|
22128 |
+
"learning_rate": 1.7138731293815825e-06,
|
22129 |
+
"loss": 0.0,
|
22130 |
+
"step": 3159
|
22131 |
+
},
|
22132 |
+
{
|
22133 |
+
"epoch": 1.8380107605060347,
|
22134 |
+
"grad_norm": NaN,
|
22135 |
+
"learning_rate": 1.701679361580333e-06,
|
22136 |
+
"loss": 0.0,
|
22137 |
+
"step": 3160
|
22138 |
+
},
|
22139 |
+
{
|
22140 |
+
"epoch": 1.8385924094808783,
|
22141 |
+
"grad_norm": NaN,
|
22142 |
+
"learning_rate": 1.6895283755816283e-06,
|
22143 |
+
"loss": 0.0,
|
22144 |
+
"step": 3161
|
22145 |
+
},
|
22146 |
+
{
|
22147 |
+
"epoch": 1.839174058455722,
|
22148 |
+
"grad_norm": NaN,
|
22149 |
+
"learning_rate": 1.6774201821486034e-06,
|
22150 |
+
"loss": 0.0,
|
22151 |
+
"step": 3162
|
22152 |
+
},
|
22153 |
+
{
|
22154 |
+
"epoch": 1.8397557074305655,
|
22155 |
+
"grad_norm": NaN,
|
22156 |
+
"learning_rate": 1.665354792006485e-06,
|
22157 |
+
"loss": 0.0,
|
22158 |
+
"step": 3163
|
22159 |
+
},
|
22160 |
+
{
|
22161 |
+
"epoch": 1.8403373564054093,
|
22162 |
+
"grad_norm": NaN,
|
22163 |
+
"learning_rate": 1.6533322158425735e-06,
|
22164 |
+
"loss": 0.0,
|
22165 |
+
"step": 3164
|
22166 |
+
},
|
22167 |
+
{
|
22168 |
+
"epoch": 1.840919005380253,
|
22169 |
+
"grad_norm": NaN,
|
22170 |
+
"learning_rate": 1.6413524643062562e-06,
|
22171 |
+
"loss": 0.0,
|
22172 |
+
"step": 3165
|
22173 |
+
},
|
22174 |
+
{
|
22175 |
+
"epoch": 1.8415006543550967,
|
22176 |
+
"grad_norm": NaN,
|
22177 |
+
"learning_rate": 1.629415548009e-06,
|
22178 |
+
"loss": 0.0,
|
22179 |
+
"step": 3166
|
22180 |
+
},
|
22181 |
+
{
|
22182 |
+
"epoch": 1.8420823033299403,
|
22183 |
+
"grad_norm": NaN,
|
22184 |
+
"learning_rate": 1.617521477524303e-06,
|
22185 |
+
"loss": 0.0,
|
22186 |
+
"step": 3167
|
22187 |
+
},
|
22188 |
+
{
|
22189 |
+
"epoch": 1.842663952304784,
|
22190 |
+
"grad_norm": NaN,
|
22191 |
+
"learning_rate": 1.6056702633877318e-06,
|
22192 |
+
"loss": 0.0,
|
22193 |
+
"step": 3168
|
22194 |
+
},
|
22195 |
+
{
|
22196 |
+
"epoch": 1.8432456012796279,
|
22197 |
+
"grad_norm": NaN,
|
22198 |
+
"learning_rate": 1.5938619160968726e-06,
|
22199 |
+
"loss": 0.0,
|
22200 |
+
"step": 3169
|
22201 |
+
},
|
22202 |
+
{
|
22203 |
+
"epoch": 1.8438272502544715,
|
22204 |
+
"grad_norm": NaN,
|
22205 |
+
"learning_rate": 1.5820964461113642e-06,
|
22206 |
+
"loss": 0.0,
|
22207 |
+
"step": 3170
|
22208 |
+
},
|
22209 |
+
{
|
22210 |
+
"epoch": 1.844408899229315,
|
22211 |
+
"grad_norm": NaN,
|
22212 |
+
"learning_rate": 1.570373863852842e-06,
|
22213 |
+
"loss": 0.0,
|
22214 |
+
"step": 3171
|
22215 |
+
},
|
22216 |
+
{
|
22217 |
+
"epoch": 1.8449905482041586,
|
22218 |
+
"grad_norm": NaN,
|
22219 |
+
"learning_rate": 1.558694179704967e-06,
|
22220 |
+
"loss": 0.0,
|
22221 |
+
"step": 3172
|
22222 |
+
},
|
22223 |
+
{
|
22224 |
+
"epoch": 1.8455721971790024,
|
22225 |
+
"grad_norm": NaN,
|
22226 |
+
"learning_rate": 1.5470574040134022e-06,
|
22227 |
+
"loss": 0.0,
|
22228 |
+
"step": 3173
|
22229 |
+
},
|
22230 |
+
{
|
22231 |
+
"epoch": 1.8461538461538463,
|
22232 |
+
"grad_norm": NaN,
|
22233 |
+
"learning_rate": 1.535463547085786e-06,
|
22234 |
+
"loss": 0.0,
|
22235 |
+
"step": 3174
|
22236 |
+
},
|
22237 |
+
{
|
22238 |
+
"epoch": 1.8467354951286898,
|
22239 |
+
"grad_norm": NaN,
|
22240 |
+
"learning_rate": 1.523912619191764e-06,
|
22241 |
+
"loss": 0.0,
|
22242 |
+
"step": 3175
|
22243 |
+
},
|
22244 |
+
{
|
22245 |
+
"epoch": 1.8473171441035334,
|
22246 |
+
"grad_norm": NaN,
|
22247 |
+
"learning_rate": 1.512404630562947e-06,
|
22248 |
+
"loss": 0.0,
|
22249 |
+
"step": 3176
|
22250 |
+
},
|
22251 |
+
{
|
22252 |
+
"epoch": 1.8478987930783772,
|
22253 |
+
"grad_norm": NaN,
|
22254 |
+
"learning_rate": 1.5009395913929024e-06,
|
22255 |
+
"loss": 0.0,
|
22256 |
+
"step": 3177
|
22257 |
+
},
|
22258 |
+
{
|
22259 |
+
"epoch": 1.848480442053221,
|
22260 |
+
"grad_norm": NaN,
|
22261 |
+
"learning_rate": 1.4895175118371629e-06,
|
22262 |
+
"loss": 0.0,
|
22263 |
+
"step": 3178
|
22264 |
+
},
|
22265 |
+
{
|
22266 |
+
"epoch": 1.8490620910280646,
|
22267 |
+
"grad_norm": NaN,
|
22268 |
+
"learning_rate": 1.4781384020132072e-06,
|
22269 |
+
"loss": 0.0,
|
22270 |
+
"step": 3179
|
22271 |
+
},
|
22272 |
+
{
|
22273 |
+
"epoch": 1.8496437400029082,
|
22274 |
+
"grad_norm": NaN,
|
22275 |
+
"learning_rate": 1.4668022720004448e-06,
|
22276 |
+
"loss": 0.0,
|
22277 |
+
"step": 3180
|
22278 |
+
},
|
22279 |
+
{
|
22280 |
+
"epoch": 1.8502253889777518,
|
22281 |
+
"grad_norm": NaN,
|
22282 |
+
"learning_rate": 1.4555091318402269e-06,
|
22283 |
+
"loss": 0.0,
|
22284 |
+
"step": 3181
|
22285 |
+
},
|
22286 |
+
{
|
22287 |
+
"epoch": 1.8508070379525956,
|
22288 |
+
"grad_norm": NaN,
|
22289 |
+
"learning_rate": 1.4442589915358128e-06,
|
22290 |
+
"loss": 0.0,
|
22291 |
+
"step": 3182
|
22292 |
+
},
|
22293 |
+
{
|
22294 |
+
"epoch": 1.8513886869274394,
|
22295 |
+
"grad_norm": NaN,
|
22296 |
+
"learning_rate": 1.4330518610523812e-06,
|
22297 |
+
"loss": 0.0,
|
22298 |
+
"step": 3183
|
22299 |
+
},
|
22300 |
+
{
|
22301 |
+
"epoch": 1.851970335902283,
|
22302 |
+
"grad_norm": NaN,
|
22303 |
+
"learning_rate": 1.4218877503170024e-06,
|
22304 |
+
"loss": 0.0,
|
22305 |
+
"step": 3184
|
22306 |
+
},
|
22307 |
+
{
|
22308 |
+
"epoch": 1.8525519848771266,
|
22309 |
+
"grad_norm": NaN,
|
22310 |
+
"learning_rate": 1.410766669218666e-06,
|
22311 |
+
"loss": 0.0,
|
22312 |
+
"step": 3185
|
22313 |
+
},
|
22314 |
+
{
|
22315 |
+
"epoch": 1.8531336338519704,
|
22316 |
+
"grad_norm": NaN,
|
22317 |
+
"learning_rate": 1.3996886276082254e-06,
|
22318 |
+
"loss": 0.0,
|
22319 |
+
"step": 3186
|
22320 |
+
},
|
22321 |
+
{
|
22322 |
+
"epoch": 1.853715282826814,
|
22323 |
+
"grad_norm": NaN,
|
22324 |
+
"learning_rate": 1.3886536352984036e-06,
|
22325 |
+
"loss": 0.0,
|
22326 |
+
"step": 3187
|
22327 |
+
},
|
22328 |
+
{
|
22329 |
+
"epoch": 1.8542969318016578,
|
22330 |
+
"grad_norm": NaN,
|
22331 |
+
"learning_rate": 1.377661702063815e-06,
|
22332 |
+
"loss": 0.0,
|
22333 |
+
"step": 3188
|
22334 |
+
},
|
22335 |
+
{
|
22336 |
+
"epoch": 1.8548785807765014,
|
22337 |
+
"grad_norm": NaN,
|
22338 |
+
"learning_rate": 1.3667128376409156e-06,
|
22339 |
+
"loss": 0.0,
|
22340 |
+
"step": 3189
|
22341 |
+
},
|
22342 |
+
{
|
22343 |
+
"epoch": 1.855460229751345,
|
22344 |
+
"grad_norm": NaN,
|
22345 |
+
"learning_rate": 1.355807051728014e-06,
|
22346 |
+
"loss": 0.0,
|
22347 |
+
"step": 3190
|
22348 |
+
},
|
22349 |
+
{
|
22350 |
+
"epoch": 1.8560418787261888,
|
22351 |
+
"grad_norm": NaN,
|
22352 |
+
"learning_rate": 1.3449443539852724e-06,
|
22353 |
+
"loss": 0.0,
|
22354 |
+
"step": 3191
|
22355 |
+
},
|
22356 |
+
{
|
22357 |
+
"epoch": 1.8566235277010326,
|
22358 |
+
"grad_norm": NaN,
|
22359 |
+
"learning_rate": 1.3341247540346657e-06,
|
22360 |
+
"loss": 0.0,
|
22361 |
+
"step": 3192
|
22362 |
+
},
|
22363 |
+
{
|
22364 |
+
"epoch": 1.8572051766758761,
|
22365 |
+
"grad_norm": NaN,
|
22366 |
+
"learning_rate": 1.3233482614600113e-06,
|
22367 |
+
"loss": 0.0,
|
22368 |
+
"step": 3193
|
22369 |
+
},
|
22370 |
+
{
|
22371 |
+
"epoch": 1.8577868256507197,
|
22372 |
+
"grad_norm": NaN,
|
22373 |
+
"learning_rate": 1.3126148858069342e-06,
|
22374 |
+
"loss": 0.0,
|
22375 |
+
"step": 3194
|
22376 |
+
},
|
22377 |
+
{
|
22378 |
+
"epoch": 1.8583684746255633,
|
22379 |
+
"grad_norm": NaN,
|
22380 |
+
"learning_rate": 1.3019246365828796e-06,
|
22381 |
+
"loss": 0.0,
|
22382 |
+
"step": 3195
|
22383 |
+
},
|
22384 |
+
{
|
22385 |
+
"epoch": 1.8589501236004071,
|
22386 |
+
"grad_norm": NaN,
|
22387 |
+
"learning_rate": 1.2912775232570672e-06,
|
22388 |
+
"loss": 0.0,
|
22389 |
+
"step": 3196
|
22390 |
+
},
|
22391 |
+
{
|
22392 |
+
"epoch": 1.859531772575251,
|
22393 |
+
"grad_norm": NaN,
|
22394 |
+
"learning_rate": 1.2806735552605365e-06,
|
22395 |
+
"loss": 0.0,
|
22396 |
+
"step": 3197
|
22397 |
+
},
|
22398 |
+
{
|
22399 |
+
"epoch": 1.8601134215500945,
|
22400 |
+
"grad_norm": NaN,
|
22401 |
+
"learning_rate": 1.2701127419860847e-06,
|
22402 |
+
"loss": 0.0,
|
22403 |
+
"step": 3198
|
22404 |
+
},
|
22405 |
+
{
|
22406 |
+
"epoch": 1.860695070524938,
|
22407 |
+
"grad_norm": NaN,
|
22408 |
+
"learning_rate": 1.2595950927883015e-06,
|
22409 |
+
"loss": 0.0,
|
22410 |
+
"step": 3199
|
22411 |
+
},
|
22412 |
+
{
|
22413 |
+
"epoch": 1.861276719499782,
|
22414 |
+
"grad_norm": NaN,
|
22415 |
+
"learning_rate": 1.249120616983529e-06,
|
22416 |
+
"loss": 0.0,
|
22417 |
+
"step": 3200
|
22418 |
+
},
|
22419 |
+
{
|
22420 |
+
"epoch": 1.8618583684746257,
|
22421 |
+
"grad_norm": NaN,
|
22422 |
+
"learning_rate": 1.2386893238498788e-06,
|
22423 |
+
"loss": 0.0,
|
22424 |
+
"step": 3201
|
22425 |
+
},
|
22426 |
+
{
|
22427 |
+
"epoch": 1.8624400174494693,
|
22428 |
+
"grad_norm": NaN,
|
22429 |
+
"learning_rate": 1.228301222627204e-06,
|
22430 |
+
"loss": 0.0,
|
22431 |
+
"step": 3202
|
22432 |
+
},
|
22433 |
+
{
|
22434 |
+
"epoch": 1.8630216664243129,
|
22435 |
+
"grad_norm": NaN,
|
22436 |
+
"learning_rate": 1.2179563225170998e-06,
|
22437 |
+
"loss": 0.0,
|
22438 |
+
"step": 3203
|
22439 |
+
},
|
22440 |
+
{
|
22441 |
+
"epoch": 1.8636033153991565,
|
22442 |
+
"grad_norm": NaN,
|
22443 |
+
"learning_rate": 1.2076546326829085e-06,
|
22444 |
+
"loss": 0.0,
|
22445 |
+
"step": 3204
|
22446 |
+
},
|
22447 |
+
{
|
22448 |
+
"epoch": 1.8641849643740003,
|
22449 |
+
"grad_norm": NaN,
|
22450 |
+
"learning_rate": 1.1973961622496755e-06,
|
22451 |
+
"loss": 0.0,
|
22452 |
+
"step": 3205
|
22453 |
+
},
|
22454 |
+
{
|
22455 |
+
"epoch": 1.864766613348844,
|
22456 |
+
"grad_norm": NaN,
|
22457 |
+
"learning_rate": 1.1871809203041707e-06,
|
22458 |
+
"loss": 0.0,
|
22459 |
+
"step": 3206
|
22460 |
+
},
|
22461 |
+
{
|
22462 |
+
"epoch": 1.8653482623236877,
|
22463 |
+
"grad_norm": NaN,
|
22464 |
+
"learning_rate": 1.177008915894884e-06,
|
22465 |
+
"loss": 0.0,
|
22466 |
+
"step": 3207
|
22467 |
+
},
|
22468 |
+
{
|
22469 |
+
"epoch": 1.8659299112985313,
|
22470 |
+
"grad_norm": NaN,
|
22471 |
+
"learning_rate": 1.166880158031991e-06,
|
22472 |
+
"loss": 0.0,
|
22473 |
+
"step": 3208
|
22474 |
+
},
|
22475 |
+
{
|
22476 |
+
"epoch": 1.866511560273375,
|
22477 |
+
"grad_norm": NaN,
|
22478 |
+
"learning_rate": 1.156794655687371e-06,
|
22479 |
+
"loss": 0.0,
|
22480 |
+
"step": 3209
|
22481 |
+
},
|
22482 |
+
{
|
22483 |
+
"epoch": 1.8670932092482189,
|
22484 |
+
"grad_norm": NaN,
|
22485 |
+
"learning_rate": 1.1467524177945832e-06,
|
22486 |
+
"loss": 0.0,
|
22487 |
+
"step": 3210
|
22488 |
+
},
|
22489 |
+
{
|
22490 |
+
"epoch": 1.8676748582230625,
|
22491 |
+
"grad_norm": NaN,
|
22492 |
+
"learning_rate": 1.1367534532488677e-06,
|
22493 |
+
"loss": 0.0,
|
22494 |
+
"step": 3211
|
22495 |
+
},
|
22496 |
+
{
|
22497 |
+
"epoch": 1.868256507197906,
|
22498 |
+
"grad_norm": NaN,
|
22499 |
+
"learning_rate": 1.1267977709071288e-06,
|
22500 |
+
"loss": 0.0,
|
22501 |
+
"step": 3212
|
22502 |
+
},
|
22503 |
+
{
|
22504 |
+
"epoch": 1.8688381561727496,
|
22505 |
+
"grad_norm": NaN,
|
22506 |
+
"learning_rate": 1.116885379587934e-06,
|
22507 |
+
"loss": 0.0,
|
22508 |
+
"step": 3213
|
22509 |
+
},
|
22510 |
+
{
|
22511 |
+
"epoch": 1.8694198051475934,
|
22512 |
+
"grad_norm": NaN,
|
22513 |
+
"learning_rate": 1.1070162880715052e-06,
|
22514 |
+
"loss": 0.0,
|
22515 |
+
"step": 3214
|
22516 |
+
},
|
22517 |
+
{
|
22518 |
+
"epoch": 1.8700014541224372,
|
22519 |
+
"grad_norm": NaN,
|
22520 |
+
"learning_rate": 1.0971905050997155e-06,
|
22521 |
+
"loss": 0.0,
|
22522 |
+
"step": 3215
|
22523 |
+
},
|
22524 |
+
{
|
22525 |
+
"epoch": 1.8705831030972808,
|
22526 |
+
"grad_norm": NaN,
|
22527 |
+
"learning_rate": 1.0874080393760644e-06,
|
22528 |
+
"loss": 0.0,
|
22529 |
+
"step": 3216
|
22530 |
+
},
|
22531 |
+
{
|
22532 |
+
"epoch": 1.8711647520721244,
|
22533 |
+
"grad_norm": NaN,
|
22534 |
+
"learning_rate": 1.077668899565687e-06,
|
22535 |
+
"loss": 0.0,
|
22536 |
+
"step": 3217
|
22537 |
+
},
|
22538 |
+
{
|
22539 |
+
"epoch": 1.871746401046968,
|
22540 |
+
"grad_norm": NaN,
|
22541 |
+
"learning_rate": 1.0679730942953492e-06,
|
22542 |
+
"loss": 0.0,
|
22543 |
+
"step": 3218
|
22544 |
+
},
|
22545 |
+
{
|
22546 |
+
"epoch": 1.8723280500218118,
|
22547 |
+
"grad_norm": NaN,
|
22548 |
+
"learning_rate": 1.05832063215342e-06,
|
22549 |
+
"loss": 0.0,
|
22550 |
+
"step": 3219
|
22551 |
+
},
|
22552 |
+
{
|
22553 |
+
"epoch": 1.8729096989966556,
|
22554 |
+
"grad_norm": NaN,
|
22555 |
+
"learning_rate": 1.0487115216898768e-06,
|
22556 |
+
"loss": 0.0,
|
22557 |
+
"step": 3220
|
22558 |
+
},
|
22559 |
+
{
|
22560 |
+
"epoch": 1.8734913479714992,
|
22561 |
+
"grad_norm": NaN,
|
22562 |
+
"learning_rate": 1.0391457714163055e-06,
|
22563 |
+
"loss": 0.0,
|
22564 |
+
"step": 3221
|
22565 |
+
},
|
22566 |
+
{
|
22567 |
+
"epoch": 1.8740729969463428,
|
22568 |
+
"grad_norm": NaN,
|
22569 |
+
"learning_rate": 1.0296233898058783e-06,
|
22570 |
+
"loss": 0.0,
|
22571 |
+
"step": 3222
|
22572 |
+
},
|
22573 |
+
{
|
22574 |
+
"epoch": 1.8746546459211866,
|
22575 |
+
"grad_norm": NaN,
|
22576 |
+
"learning_rate": 1.0201443852933535e-06,
|
22577 |
+
"loss": 0.0,
|
22578 |
+
"step": 3223
|
22579 |
+
},
|
22580 |
+
{
|
22581 |
+
"epoch": 1.8752362948960304,
|
22582 |
+
"grad_norm": NaN,
|
22583 |
+
"learning_rate": 1.0107087662750703e-06,
|
22584 |
+
"loss": 0.0,
|
22585 |
+
"step": 3224
|
22586 |
+
},
|
22587 |
+
{
|
22588 |
+
"epoch": 1.875817943870874,
|
22589 |
+
"grad_norm": NaN,
|
22590 |
+
"learning_rate": 1.0013165411089266e-06,
|
22591 |
+
"loss": 0.0,
|
22592 |
+
"step": 3225
|
22593 |
+
},
|
22594 |
+
{
|
22595 |
+
"epoch": 1.8763995928457176,
|
22596 |
+
"grad_norm": NaN,
|
22597 |
+
"learning_rate": 9.91967718114395e-07,
|
22598 |
+
"loss": 0.0,
|
22599 |
+
"step": 3226
|
22600 |
+
},
|
22601 |
+
{
|
22602 |
+
"epoch": 1.8769812418205611,
|
22603 |
+
"grad_norm": NaN,
|
22604 |
+
"learning_rate": 9.826623055724959e-07,
|
22605 |
+
"loss": 0.0,
|
22606 |
+
"step": 3227
|
22607 |
+
},
|
22608 |
+
{
|
22609 |
+
"epoch": 1.877562890795405,
|
22610 |
+
"grad_norm": NaN,
|
22611 |
+
"learning_rate": 9.734003117257974e-07,
|
22612 |
+
"loss": 0.0,
|
22613 |
+
"step": 3228
|
22614 |
+
},
|
22615 |
+
{
|
22616 |
+
"epoch": 1.8781445397702488,
|
22617 |
+
"grad_norm": NaN,
|
22618 |
+
"learning_rate": 9.641817447784195e-07,
|
22619 |
+
"loss": 0.0,
|
22620 |
+
"step": 3229
|
22621 |
+
},
|
22622 |
+
{
|
22623 |
+
"epoch": 1.8787261887450923,
|
22624 |
+
"grad_norm": NaN,
|
22625 |
+
"learning_rate": 9.550066128959978e-07,
|
22626 |
+
"loss": 0.0,
|
22627 |
+
"step": 3230
|
22628 |
+
},
|
22629 |
+
{
|
22630 |
+
"epoch": 1.879307837719936,
|
22631 |
+
"grad_norm": NaN,
|
22632 |
+
"learning_rate": 9.458749242057085e-07,
|
22633 |
+
"loss": 0.0,
|
22634 |
+
"step": 3231
|
22635 |
+
},
|
22636 |
+
{
|
22637 |
+
"epoch": 1.8798894866947797,
|
22638 |
+
"grad_norm": NaN,
|
22639 |
+
"learning_rate": 9.367866867962427e-07,
|
22640 |
+
"loss": 0.0,
|
22641 |
+
"step": 3232
|
22642 |
+
},
|
22643 |
+
{
|
22644 |
+
"epoch": 1.8804711356696235,
|
22645 |
+
"grad_norm": NaN,
|
22646 |
+
"learning_rate": 9.277419087177885e-07,
|
22647 |
+
"loss": 0.0,
|
22648 |
+
"step": 3233
|
22649 |
+
},
|
22650 |
+
{
|
22651 |
+
"epoch": 1.8810527846444671,
|
22652 |
+
"grad_norm": NaN,
|
22653 |
+
"learning_rate": 9.187405979820651e-07,
|
22654 |
+
"loss": 0.0,
|
22655 |
+
"step": 3234
|
22656 |
+
},
|
22657 |
+
{
|
22658 |
+
"epoch": 1.8816344336193107,
|
22659 |
+
"grad_norm": NaN,
|
22660 |
+
"learning_rate": 9.097827625622723e-07,
|
22661 |
+
"loss": 0.0,
|
22662 |
+
"step": 3235
|
22663 |
+
},
|
22664 |
+
{
|
22665 |
+
"epoch": 1.8822160825941543,
|
22666 |
+
"grad_norm": NaN,
|
22667 |
+
"learning_rate": 9.008684103930964e-07,
|
22668 |
+
"loss": 0.0,
|
22669 |
+
"step": 3236
|
22670 |
+
},
|
22671 |
+
{
|
22672 |
+
"epoch": 1.882797731568998,
|
22673 |
+
"grad_norm": NaN,
|
22674 |
+
"learning_rate": 8.919975493707211e-07,
|
22675 |
+
"loss": 0.0,
|
22676 |
+
"step": 3237
|
22677 |
+
},
|
22678 |
+
{
|
22679 |
+
"epoch": 1.883379380543842,
|
22680 |
+
"grad_norm": NaN,
|
22681 |
+
"learning_rate": 8.831701873527942e-07,
|
22682 |
+
"loss": 0.0,
|
22683 |
+
"step": 3238
|
22684 |
+
},
|
22685 |
+
{
|
22686 |
+
"epoch": 1.8839610295186855,
|
22687 |
+
"grad_norm": NaN,
|
22688 |
+
"learning_rate": 8.743863321584334e-07,
|
22689 |
+
"loss": 0.0,
|
22690 |
+
"step": 3239
|
22691 |
+
},
|
22692 |
+
{
|
22693 |
+
"epoch": 1.884542678493529,
|
22694 |
+
"grad_norm": NaN,
|
22695 |
+
"learning_rate": 8.656459915682369e-07,
|
22696 |
+
"loss": 0.0,
|
22697 |
+
"step": 3240
|
22698 |
+
},
|
22699 |
+
{
|
22700 |
+
"epoch": 1.8851243274683729,
|
22701 |
+
"grad_norm": NaN,
|
22702 |
+
"learning_rate": 8.569491733242341e-07,
|
22703 |
+
"loss": 0.0,
|
22704 |
+
"step": 3241
|
22705 |
+
},
|
22706 |
+
{
|
22707 |
+
"epoch": 1.8857059764432165,
|
22708 |
+
"grad_norm": NaN,
|
22709 |
+
"learning_rate": 8.482958851299127e-07,
|
22710 |
+
"loss": 0.0,
|
22711 |
+
"step": 3242
|
22712 |
+
},
|
22713 |
+
{
|
22714 |
+
"epoch": 1.8862876254180603,
|
22715 |
+
"grad_norm": NaN,
|
22716 |
+
"learning_rate": 8.396861346502138e-07,
|
22717 |
+
"loss": 0.0,
|
22718 |
+
"step": 3243
|
22719 |
+
},
|
22720 |
+
{
|
22721 |
+
"epoch": 1.8868692743929039,
|
22722 |
+
"grad_norm": NaN,
|
22723 |
+
"learning_rate": 8.311199295114924e-07,
|
22724 |
+
"loss": 0.0,
|
22725 |
+
"step": 3244
|
22726 |
+
},
|
22727 |
+
{
|
22728 |
+
"epoch": 1.8874509233677474,
|
22729 |
+
"grad_norm": NaN,
|
22730 |
+
"learning_rate": 8.225972773015456e-07,
|
22731 |
+
"loss": 0.0,
|
22732 |
+
"step": 3245
|
22733 |
+
},
|
22734 |
+
{
|
22735 |
+
"epoch": 1.8880325723425913,
|
22736 |
+
"grad_norm": NaN,
|
22737 |
+
"learning_rate": 8.141181855695845e-07,
|
22738 |
+
"loss": 0.0,
|
22739 |
+
"step": 3246
|
22740 |
+
},
|
22741 |
+
{
|
22742 |
+
"epoch": 1.888614221317435,
|
22743 |
+
"grad_norm": NaN,
|
22744 |
+
"learning_rate": 8.056826618262459e-07,
|
22745 |
+
"loss": 0.0,
|
22746 |
+
"step": 3247
|
22747 |
+
},
|
22748 |
+
{
|
22749 |
+
"epoch": 1.8891958702922786,
|
22750 |
+
"grad_norm": NaN,
|
22751 |
+
"learning_rate": 7.972907135435637e-07,
|
22752 |
+
"loss": 0.0,
|
22753 |
+
"step": 3248
|
22754 |
+
},
|
22755 |
+
{
|
22756 |
+
"epoch": 1.8897775192671222,
|
22757 |
+
"grad_norm": NaN,
|
22758 |
+
"learning_rate": 7.889423481549862e-07,
|
22759 |
+
"loss": 0.0,
|
22760 |
+
"step": 3249
|
22761 |
+
},
|
22762 |
+
{
|
22763 |
+
"epoch": 1.8903591682419658,
|
22764 |
+
"grad_norm": NaN,
|
22765 |
+
"learning_rate": 7.806375730553372e-07,
|
22766 |
+
"loss": 0.0,
|
22767 |
+
"step": 3250
|
22768 |
+
},
|
22769 |
+
{
|
22770 |
+
"epoch": 1.8909408172168096,
|
22771 |
+
"grad_norm": NaN,
|
22772 |
+
"learning_rate": 7.72376395600849e-07,
|
22773 |
+
"loss": 0.0,
|
22774 |
+
"step": 3251
|
22775 |
+
},
|
22776 |
+
{
|
22777 |
+
"epoch": 1.8915224661916534,
|
22778 |
+
"grad_norm": NaN,
|
22779 |
+
"learning_rate": 7.641588231091234e-07,
|
22780 |
+
"loss": 0.0,
|
22781 |
+
"step": 3252
|
22782 |
+
},
|
22783 |
+
{
|
22784 |
+
"epoch": 1.892104115166497,
|
22785 |
+
"grad_norm": NaN,
|
22786 |
+
"learning_rate": 7.559848628591437e-07,
|
22787 |
+
"loss": 0.0,
|
22788 |
+
"step": 3253
|
22789 |
+
},
|
22790 |
+
{
|
22791 |
+
"epoch": 1.8926857641413406,
|
22792 |
+
"grad_norm": NaN,
|
22793 |
+
"learning_rate": 7.47854522091257e-07,
|
22794 |
+
"loss": 0.0,
|
22795 |
+
"step": 3254
|
22796 |
+
},
|
22797 |
+
{
|
22798 |
+
"epoch": 1.8932674131161844,
|
22799 |
+
"grad_norm": NaN,
|
22800 |
+
"learning_rate": 7.397678080071746e-07,
|
22801 |
+
"loss": 0.0,
|
22802 |
+
"step": 3255
|
22803 |
+
},
|
22804 |
+
{
|
22805 |
+
"epoch": 1.8938490620910282,
|
22806 |
+
"grad_norm": NaN,
|
22807 |
+
"learning_rate": 7.317247277699724e-07,
|
22808 |
+
"loss": 0.0,
|
22809 |
+
"step": 3256
|
22810 |
+
},
|
22811 |
+
{
|
22812 |
+
"epoch": 1.8944307110658718,
|
22813 |
+
"grad_norm": NaN,
|
22814 |
+
"learning_rate": 7.237252885040624e-07,
|
22815 |
+
"loss": 0.0,
|
22816 |
+
"step": 3257
|
22817 |
+
},
|
22818 |
+
{
|
22819 |
+
"epoch": 1.8950123600407154,
|
22820 |
+
"grad_norm": NaN,
|
22821 |
+
"learning_rate": 7.157694972952045e-07,
|
22822 |
+
"loss": 0.0,
|
22823 |
+
"step": 3258
|
22824 |
+
},
|
22825 |
+
{
|
22826 |
+
"epoch": 1.895594009015559,
|
22827 |
+
"grad_norm": NaN,
|
22828 |
+
"learning_rate": 7.078573611905115e-07,
|
22829 |
+
"loss": 0.0,
|
22830 |
+
"step": 3259
|
22831 |
+
},
|
22832 |
+
{
|
22833 |
+
"epoch": 1.8961756579904028,
|
22834 |
+
"grad_norm": NaN,
|
22835 |
+
"learning_rate": 6.999888871984106e-07,
|
22836 |
+
"loss": 0.0,
|
22837 |
+
"step": 3260
|
22838 |
+
},
|
22839 |
+
{
|
22840 |
+
"epoch": 1.8967573069652466,
|
22841 |
+
"grad_norm": NaN,
|
22842 |
+
"learning_rate": 6.921640822886488e-07,
|
22843 |
+
"loss": 0.0,
|
22844 |
+
"step": 3261
|
22845 |
+
},
|
22846 |
+
{
|
22847 |
+
"epoch": 1.8973389559400902,
|
22848 |
+
"grad_norm": NaN,
|
22849 |
+
"learning_rate": 6.843829533923041e-07,
|
22850 |
+
"loss": 0.0,
|
22851 |
+
"step": 3262
|
22852 |
+
},
|
22853 |
+
{
|
22854 |
+
"epoch": 1.8979206049149338,
|
22855 |
+
"grad_norm": NaN,
|
22856 |
+
"learning_rate": 6.766455074017686e-07,
|
22857 |
+
"loss": 0.0,
|
22858 |
+
"step": 3263
|
22859 |
+
},
|
22860 |
+
{
|
22861 |
+
"epoch": 1.8985022538897776,
|
22862 |
+
"grad_norm": NaN,
|
22863 |
+
"learning_rate": 6.689517511707322e-07,
|
22864 |
+
"loss": 0.0,
|
22865 |
+
"step": 3264
|
22866 |
+
},
|
22867 |
+
{
|
22868 |
+
"epoch": 1.8990839028646214,
|
22869 |
+
"grad_norm": NaN,
|
22870 |
+
"learning_rate": 6.613016915141879e-07,
|
22871 |
+
"loss": 0.0,
|
22872 |
+
"step": 3265
|
22873 |
+
},
|
22874 |
+
{
|
22875 |
+
"epoch": 1.899665551839465,
|
22876 |
+
"grad_norm": NaN,
|
22877 |
+
"learning_rate": 6.536953352084263e-07,
|
22878 |
+
"loss": 0.0,
|
22879 |
+
"step": 3266
|
22880 |
+
},
|
22881 |
+
{
|
22882 |
+
"epoch": 1.9002472008143085,
|
22883 |
+
"grad_norm": NaN,
|
22884 |
+
"learning_rate": 6.461326889910191e-07,
|
22885 |
+
"loss": 0.0,
|
22886 |
+
"step": 3267
|
22887 |
+
},
|
22888 |
+
{
|
22889 |
+
"epoch": 1.9008288497891521,
|
22890 |
+
"grad_norm": NaN,
|
22891 |
+
"learning_rate": 6.386137595608299e-07,
|
22892 |
+
"loss": 0.0,
|
22893 |
+
"step": 3268
|
22894 |
+
},
|
22895 |
+
{
|
22896 |
+
"epoch": 1.901410498763996,
|
22897 |
+
"grad_norm": NaN,
|
22898 |
+
"learning_rate": 6.311385535779979e-07,
|
22899 |
+
"loss": 0.0,
|
22900 |
+
"step": 3269
|
22901 |
+
},
|
22902 |
+
{
|
22903 |
+
"epoch": 1.9019921477388397,
|
22904 |
+
"grad_norm": NaN,
|
22905 |
+
"learning_rate": 6.237070776639209e-07,
|
22906 |
+
"loss": 0.0,
|
22907 |
+
"step": 3270
|
22908 |
+
},
|
22909 |
+
{
|
22910 |
+
"epoch": 1.9025737967136833,
|
22911 |
+
"grad_norm": NaN,
|
22912 |
+
"learning_rate": 6.163193384012722e-07,
|
22913 |
+
"loss": 0.0,
|
22914 |
+
"step": 3271
|
22915 |
+
},
|
22916 |
+
{
|
22917 |
+
"epoch": 1.903155445688527,
|
22918 |
+
"grad_norm": NaN,
|
22919 |
+
"learning_rate": 6.089753423339839e-07,
|
22920 |
+
"loss": 0.0,
|
22921 |
+
"step": 3272
|
22922 |
+
},
|
22923 |
+
{
|
22924 |
+
"epoch": 1.9037370946633707,
|
22925 |
+
"grad_norm": NaN,
|
22926 |
+
"learning_rate": 6.016750959672357e-07,
|
22927 |
+
"loss": 0.0,
|
22928 |
+
"step": 3273
|
22929 |
+
},
|
22930 |
+
{
|
22931 |
+
"epoch": 1.9043187436382143,
|
22932 |
+
"grad_norm": NaN,
|
22933 |
+
"learning_rate": 5.944186057674606e-07,
|
22934 |
+
"loss": 0.0,
|
22935 |
+
"step": 3274
|
22936 |
+
},
|
22937 |
+
{
|
22938 |
+
"epoch": 1.904900392613058,
|
22939 |
+
"grad_norm": NaN,
|
22940 |
+
"learning_rate": 5.872058781623224e-07,
|
22941 |
+
"loss": 0.0,
|
22942 |
+
"step": 3275
|
22943 |
+
},
|
22944 |
+
{
|
22945 |
+
"epoch": 1.9054820415879017,
|
22946 |
+
"grad_norm": NaN,
|
22947 |
+
"learning_rate": 5.80036919540733e-07,
|
22948 |
+
"loss": 0.0,
|
22949 |
+
"step": 3276
|
22950 |
+
},
|
22951 |
+
{
|
22952 |
+
"epoch": 1.9060636905627453,
|
22953 |
+
"grad_norm": NaN,
|
22954 |
+
"learning_rate": 5.729117362528346e-07,
|
22955 |
+
"loss": 0.0,
|
22956 |
+
"step": 3277
|
22957 |
+
},
|
22958 |
+
{
|
22959 |
+
"epoch": 1.906645339537589,
|
22960 |
+
"grad_norm": NaN,
|
22961 |
+
"learning_rate": 5.658303346099847e-07,
|
22962 |
+
"loss": 0.0,
|
22963 |
+
"step": 3278
|
22964 |
+
},
|
22965 |
+
{
|
22966 |
+
"epoch": 1.9072269885124329,
|
22967 |
+
"grad_norm": NaN,
|
22968 |
+
"learning_rate": 5.587927208847599e-07,
|
22969 |
+
"loss": 0.0,
|
22970 |
+
"step": 3279
|
22971 |
+
},
|
22972 |
+
{
|
22973 |
+
"epoch": 1.9078086374872765,
|
22974 |
+
"grad_norm": NaN,
|
22975 |
+
"learning_rate": 5.517989013109626e-07,
|
22976 |
+
"loss": 0.0,
|
22977 |
+
"step": 3280
|
22978 |
+
},
|
22979 |
+
{
|
22980 |
+
"epoch": 1.90839028646212,
|
22981 |
+
"grad_norm": NaN,
|
22982 |
+
"learning_rate": 5.448488820835929e-07,
|
22983 |
+
"loss": 0.0,
|
22984 |
+
"step": 3281
|
22985 |
+
},
|
22986 |
+
{
|
22987 |
+
"epoch": 1.9089719354369636,
|
22988 |
+
"grad_norm": NaN,
|
22989 |
+
"learning_rate": 5.379426693588485e-07,
|
22990 |
+
"loss": 0.0,
|
22991 |
+
"step": 3282
|
22992 |
+
},
|
22993 |
+
{
|
22994 |
+
"epoch": 1.9095535844118074,
|
22995 |
+
"grad_norm": NaN,
|
22996 |
+
"learning_rate": 5.310802692541416e-07,
|
22997 |
+
"loss": 0.0,
|
22998 |
+
"step": 3283
|
22999 |
+
},
|
23000 |
+
{
|
23001 |
+
"epoch": 1.9101352333866513,
|
23002 |
+
"grad_norm": NaN,
|
23003 |
+
"learning_rate": 5.242616878480599e-07,
|
23004 |
+
"loss": 0.0,
|
23005 |
+
"step": 3284
|
23006 |
+
},
|
23007 |
+
{
|
23008 |
+
"epoch": 1.9107168823614948,
|
23009 |
+
"grad_norm": NaN,
|
23010 |
+
"learning_rate": 5.174869311803831e-07,
|
23011 |
+
"loss": 0.0,
|
23012 |
+
"step": 3285
|
23013 |
+
},
|
23014 |
+
{
|
23015 |
+
"epoch": 1.9112985313363384,
|
23016 |
+
"grad_norm": NaN,
|
23017 |
+
"learning_rate": 5.107560052520665e-07,
|
23018 |
+
"loss": 0.0,
|
23019 |
+
"step": 3286
|
23020 |
+
},
|
23021 |
+
{
|
23022 |
+
"epoch": 1.9118801803111822,
|
23023 |
+
"grad_norm": NaN,
|
23024 |
+
"learning_rate": 5.040689160252576e-07,
|
23025 |
+
"loss": 0.0,
|
23026 |
+
"step": 3287
|
23027 |
+
},
|
23028 |
+
{
|
23029 |
+
"epoch": 1.912461829286026,
|
23030 |
+
"grad_norm": NaN,
|
23031 |
+
"learning_rate": 4.974256694232515e-07,
|
23032 |
+
"loss": 0.0,
|
23033 |
+
"step": 3288
|
23034 |
+
},
|
23035 |
+
{
|
23036 |
+
"epoch": 1.9130434782608696,
|
23037 |
+
"grad_norm": NaN,
|
23038 |
+
"learning_rate": 4.908262713305301e-07,
|
23039 |
+
"loss": 0.0,
|
23040 |
+
"step": 3289
|
23041 |
+
},
|
23042 |
+
{
|
23043 |
+
"epoch": 1.9136251272357132,
|
23044 |
+
"grad_norm": NaN,
|
23045 |
+
"learning_rate": 4.842707275927116e-07,
|
23046 |
+
"loss": 0.0,
|
23047 |
+
"step": 3290
|
23048 |
+
},
|
23049 |
+
{
|
23050 |
+
"epoch": 1.9142067762105568,
|
23051 |
+
"grad_norm": NaN,
|
23052 |
+
"learning_rate": 4.777590440165902e-07,
|
23053 |
+
"loss": 0.0,
|
23054 |
+
"step": 3291
|
23055 |
+
},
|
23056 |
+
{
|
23057 |
+
"epoch": 1.9147884251854006,
|
23058 |
+
"grad_norm": NaN,
|
23059 |
+
"learning_rate": 4.712912263701019e-07,
|
23060 |
+
"loss": 0.0,
|
23061 |
+
"step": 3292
|
23062 |
+
},
|
23063 |
+
{
|
23064 |
+
"epoch": 1.9153700741602444,
|
23065 |
+
"grad_norm": NaN,
|
23066 |
+
"learning_rate": 4.6486728038231397e-07,
|
23067 |
+
"loss": 0.0,
|
23068 |
+
"step": 3293
|
23069 |
+
},
|
23070 |
+
{
|
23071 |
+
"epoch": 1.915951723135088,
|
23072 |
+
"grad_norm": NaN,
|
23073 |
+
"learning_rate": 4.58487211743458e-07,
|
23074 |
+
"loss": 0.0,
|
23075 |
+
"step": 3294
|
23076 |
+
},
|
23077 |
+
{
|
23078 |
+
"epoch": 1.9165333721099316,
|
23079 |
+
"grad_norm": NaN,
|
23080 |
+
"learning_rate": 4.521510261048745e-07,
|
23081 |
+
"loss": 0.0,
|
23082 |
+
"step": 3295
|
23083 |
+
},
|
23084 |
+
{
|
23085 |
+
"epoch": 1.9171150210847754,
|
23086 |
+
"grad_norm": NaN,
|
23087 |
+
"learning_rate": 4.4585872907905723e-07,
|
23088 |
+
"loss": 0.0,
|
23089 |
+
"step": 3296
|
23090 |
+
},
|
23091 |
+
{
|
23092 |
+
"epoch": 1.917696670059619,
|
23093 |
+
"grad_norm": NaN,
|
23094 |
+
"learning_rate": 4.396103262396034e-07,
|
23095 |
+
"loss": 0.0,
|
23096 |
+
"step": 3297
|
23097 |
+
},
|
23098 |
+
{
|
23099 |
+
"epoch": 1.9182783190344628,
|
23100 |
+
"grad_norm": NaN,
|
23101 |
+
"learning_rate": 4.3340582312124123e-07,
|
23102 |
+
"loss": 0.0,
|
23103 |
+
"step": 3298
|
23104 |
+
},
|
23105 |
+
{
|
23106 |
+
"epoch": 1.9188599680093064,
|
23107 |
+
"grad_norm": NaN,
|
23108 |
+
"learning_rate": 4.2724522521981337e-07,
|
23109 |
+
"loss": 0.0,
|
23110 |
+
"step": 3299
|
23111 |
+
},
|
23112 |
+
{
|
23113 |
+
"epoch": 1.91944161698415,
|
23114 |
+
"grad_norm": NaN,
|
23115 |
+
"learning_rate": 4.211285379922658e-07,
|
23116 |
+
"loss": 0.0,
|
23117 |
+
"step": 3300
|
23118 |
+
},
|
23119 |
+
{
|
23120 |
+
"epoch": 1.9200232659589938,
|
23121 |
+
"grad_norm": NaN,
|
23122 |
+
"learning_rate": 4.150557668566535e-07,
|
23123 |
+
"loss": 0.0,
|
23124 |
+
"step": 3301
|
23125 |
+
},
|
23126 |
+
{
|
23127 |
+
"epoch": 1.9206049149338376,
|
23128 |
+
"grad_norm": NaN,
|
23129 |
+
"learning_rate": 4.0902691719212903e-07,
|
23130 |
+
"loss": 0.0,
|
23131 |
+
"step": 3302
|
23132 |
+
},
|
23133 |
+
{
|
23134 |
+
"epoch": 1.9211865639086811,
|
23135 |
+
"grad_norm": NaN,
|
23136 |
+
"learning_rate": 4.0304199433894296e-07,
|
23137 |
+
"loss": 0.0,
|
23138 |
+
"step": 3303
|
23139 |
+
},
|
23140 |
+
{
|
23141 |
+
"epoch": 1.9217682128835247,
|
23142 |
+
"grad_norm": NaN,
|
23143 |
+
"learning_rate": 3.9710100359842683e-07,
|
23144 |
+
"loss": 0.0,
|
23145 |
+
"step": 3304
|
23146 |
+
},
|
23147 |
+
{
|
23148 |
+
"epoch": 1.9223498618583683,
|
23149 |
+
"grad_norm": NaN,
|
23150 |
+
"learning_rate": 3.9120395023302114e-07,
|
23151 |
+
"loss": 0.0,
|
23152 |
+
"step": 3305
|
23153 |
+
},
|
23154 |
+
{
|
23155 |
+
"epoch": 1.9229315108332121,
|
23156 |
+
"grad_norm": NaN,
|
23157 |
+
"learning_rate": 3.8535083946621976e-07,
|
23158 |
+
"loss": 0.0,
|
23159 |
+
"step": 3306
|
23160 |
+
},
|
23161 |
+
{
|
23162 |
+
"epoch": 1.923513159808056,
|
23163 |
+
"grad_norm": NaN,
|
23164 |
+
"learning_rate": 3.7954167648260874e-07,
|
23165 |
+
"loss": 0.0,
|
23166 |
+
"step": 3307
|
23167 |
+
},
|
23168 |
+
{
|
23169 |
+
"epoch": 1.9240948087828995,
|
23170 |
+
"grad_norm": NaN,
|
23171 |
+
"learning_rate": 3.737764664278443e-07,
|
23172 |
+
"loss": 0.0,
|
23173 |
+
"step": 3308
|
23174 |
+
},
|
23175 |
+
{
|
23176 |
+
"epoch": 1.924676457757743,
|
23177 |
+
"grad_norm": NaN,
|
23178 |
+
"learning_rate": 3.680552144086469e-07,
|
23179 |
+
"loss": 0.0,
|
23180 |
+
"step": 3309
|
23181 |
+
},
|
23182 |
+
{
|
23183 |
+
"epoch": 1.925258106732587,
|
23184 |
+
"grad_norm": NaN,
|
23185 |
+
"learning_rate": 3.6237792549279613e-07,
|
23186 |
+
"loss": 0.0,
|
23187 |
+
"step": 3310
|
23188 |
+
},
|
23189 |
+
{
|
23190 |
+
"epoch": 1.9258397557074307,
|
23191 |
+
"grad_norm": NaN,
|
23192 |
+
"learning_rate": 3.567446047091416e-07,
|
23193 |
+
"loss": 0.0,
|
23194 |
+
"step": 3311
|
23195 |
+
},
|
23196 |
+
{
|
23197 |
+
"epoch": 1.9264214046822743,
|
23198 |
+
"grad_norm": NaN,
|
23199 |
+
"learning_rate": 3.511552570475807e-07,
|
23200 |
+
"loss": 0.0,
|
23201 |
+
"step": 3312
|
23202 |
+
},
|
23203 |
+
{
|
23204 |
+
"epoch": 1.9270030536571179,
|
23205 |
+
"grad_norm": NaN,
|
23206 |
+
"learning_rate": 3.4560988745904744e-07,
|
23207 |
+
"loss": 0.0,
|
23208 |
+
"step": 3313
|
23209 |
+
},
|
23210 |
+
{
|
23211 |
+
"epoch": 1.9275847026319615,
|
23212 |
+
"grad_norm": NaN,
|
23213 |
+
"learning_rate": 3.40108500855546e-07,
|
23214 |
+
"loss": 0.0,
|
23215 |
+
"step": 3314
|
23216 |
+
},
|
23217 |
+
{
|
23218 |
+
"epoch": 1.9281663516068053,
|
23219 |
+
"grad_norm": NaN,
|
23220 |
+
"learning_rate": 3.3465110211010065e-07,
|
23221 |
+
"loss": 0.0,
|
23222 |
+
"step": 3315
|
23223 |
+
},
|
23224 |
+
{
|
23225 |
+
"epoch": 1.928748000581649,
|
23226 |
+
"grad_norm": NaN,
|
23227 |
+
"learning_rate": 3.2923769605678335e-07,
|
23228 |
+
"loss": 0.0,
|
23229 |
+
"step": 3316
|
23230 |
+
},
|
23231 |
+
{
|
23232 |
+
"epoch": 1.9293296495564927,
|
23233 |
+
"grad_norm": NaN,
|
23234 |
+
"learning_rate": 3.238682874906918e-07,
|
23235 |
+
"loss": 0.0,
|
23236 |
+
"step": 3317
|
23237 |
+
},
|
23238 |
+
{
|
23239 |
+
"epoch": 1.9299112985313363,
|
23240 |
+
"grad_norm": NaN,
|
23241 |
+
"learning_rate": 3.185428811679492e-07,
|
23242 |
+
"loss": 0.0,
|
23243 |
+
"step": 3318
|
23244 |
+
},
|
23245 |
+
{
|
23246 |
+
"epoch": 1.93049294750618,
|
23247 |
+
"grad_norm": NaN,
|
23248 |
+
"learning_rate": 3.132614818057156e-07,
|
23249 |
+
"loss": 0.0,
|
23250 |
+
"step": 3319
|
23251 |
+
},
|
23252 |
+
{
|
23253 |
+
"epoch": 1.9310745964810239,
|
23254 |
+
"grad_norm": NaN,
|
23255 |
+
"learning_rate": 3.0802409408215436e-07,
|
23256 |
+
"loss": 0.0,
|
23257 |
+
"step": 3320
|
23258 |
+
},
|
23259 |
+
{
|
23260 |
+
"epoch": 1.9316562454558674,
|
23261 |
+
"grad_norm": NaN,
|
23262 |
+
"learning_rate": 3.028307226364546e-07,
|
23263 |
+
"loss": 0.0,
|
23264 |
+
"step": 3321
|
23265 |
+
},
|
23266 |
+
{
|
23267 |
+
"epoch": 1.932237894430711,
|
23268 |
+
"grad_norm": NaN,
|
23269 |
+
"learning_rate": 2.976813720688143e-07,
|
23270 |
+
"loss": 0.0,
|
23271 |
+
"step": 3322
|
23272 |
+
},
|
23273 |
+
{
|
23274 |
+
"epoch": 1.9328195434055546,
|
23275 |
+
"grad_norm": NaN,
|
23276 |
+
"learning_rate": 2.925760469404293e-07,
|
23277 |
+
"loss": 0.0,
|
23278 |
+
"step": 3323
|
23279 |
+
},
|
23280 |
+
{
|
23281 |
+
"epoch": 1.9334011923803984,
|
23282 |
+
"grad_norm": NaN,
|
23283 |
+
"learning_rate": 2.8751475177352106e-07,
|
23284 |
+
"loss": 0.0,
|
23285 |
+
"step": 3324
|
23286 |
+
},
|
23287 |
+
{
|
23288 |
+
"epoch": 1.9339828413552422,
|
23289 |
+
"grad_norm": NaN,
|
23290 |
+
"learning_rate": 2.8249749105128673e-07,
|
23291 |
+
"loss": 0.0,
|
23292 |
+
"step": 3325
|
23293 |
+
},
|
23294 |
+
{
|
23295 |
+
"epoch": 1.9345644903300858,
|
23296 |
+
"grad_norm": NaN,
|
23297 |
+
"learning_rate": 2.7752426921792695e-07,
|
23298 |
+
"loss": 0.0,
|
23299 |
+
"step": 3326
|
23300 |
+
},
|
23301 |
+
{
|
23302 |
+
"epoch": 1.9351461393049294,
|
23303 |
+
"grad_norm": NaN,
|
23304 |
+
"learning_rate": 2.7259509067863456e-07,
|
23305 |
+
"loss": 0.0,
|
23306 |
+
"step": 3327
|
23307 |
+
},
|
23308 |
+
{
|
23309 |
+
"epoch": 1.9357277882797732,
|
23310 |
+
"grad_norm": NaN,
|
23311 |
+
"learning_rate": 2.6770995979959467e-07,
|
23312 |
+
"loss": 0.0,
|
23313 |
+
"step": 3328
|
23314 |
+
},
|
23315 |
+
{
|
23316 |
+
"epoch": 1.9363094372546168,
|
23317 |
+
"grad_norm": NaN,
|
23318 |
+
"learning_rate": 2.628688809079682e-07,
|
23319 |
+
"loss": 0.0,
|
23320 |
+
"step": 3329
|
23321 |
+
},
|
23322 |
+
{
|
23323 |
+
"epoch": 1.9368910862294606,
|
23324 |
+
"grad_norm": NaN,
|
23325 |
+
"learning_rate": 2.5807185829189173e-07,
|
23326 |
+
"loss": 0.0,
|
23327 |
+
"step": 3330
|
23328 |
+
},
|
23329 |
+
{
|
23330 |
+
"epoch": 1.9374727352043042,
|
23331 |
+
"grad_norm": NaN,
|
23332 |
+
"learning_rate": 2.533188962004884e-07,
|
23333 |
+
"loss": 0.0,
|
23334 |
+
"step": 3331
|
23335 |
+
},
|
23336 |
+
{
|
23337 |
+
"epoch": 1.9380543841791478,
|
23338 |
+
"grad_norm": NaN,
|
23339 |
+
"learning_rate": 2.486099988438462e-07,
|
23340 |
+
"loss": 0.0,
|
23341 |
+
"step": 3332
|
23342 |
+
},
|
23343 |
+
{
|
23344 |
+
"epoch": 1.9386360331539916,
|
23345 |
+
"grad_norm": NaN,
|
23346 |
+
"learning_rate": 2.4394517039302844e-07,
|
23347 |
+
"loss": 0.0,
|
23348 |
+
"step": 3333
|
23349 |
+
},
|
23350 |
+
{
|
23351 |
+
"epoch": 1.9392176821288354,
|
23352 |
+
"grad_norm": NaN,
|
23353 |
+
"learning_rate": 2.3932441498005775e-07,
|
23354 |
+
"loss": 0.0,
|
23355 |
+
"step": 3334
|
23356 |
+
},
|
23357 |
+
{
|
23358 |
+
"epoch": 1.939799331103679,
|
23359 |
+
"grad_norm": NaN,
|
23360 |
+
"learning_rate": 2.3474773669791007e-07,
|
23361 |
+
"loss": 0.0,
|
23362 |
+
"step": 3335
|
23363 |
+
},
|
23364 |
+
{
|
23365 |
+
"epoch": 1.9403809800785226,
|
23366 |
+
"grad_norm": NaN,
|
23367 |
+
"learning_rate": 2.3021513960053143e-07,
|
23368 |
+
"loss": 0.0,
|
23369 |
+
"step": 3336
|
23370 |
+
},
|
23371 |
+
{
|
23372 |
+
"epoch": 1.9409626290533661,
|
23373 |
+
"grad_norm": NaN,
|
23374 |
+
"learning_rate": 2.2572662770281584e-07,
|
23375 |
+
"loss": 0.0,
|
23376 |
+
"step": 3337
|
23377 |
+
},
|
23378 |
+
{
|
23379 |
+
"epoch": 1.94154427802821,
|
23380 |
+
"grad_norm": NaN,
|
23381 |
+
"learning_rate": 2.2128220498061069e-07,
|
23382 |
+
"loss": 0.0,
|
23383 |
+
"step": 3338
|
23384 |
+
},
|
23385 |
+
{
|
23386 |
+
"epoch": 1.9421259270030538,
|
23387 |
+
"grad_norm": NaN,
|
23388 |
+
"learning_rate": 2.1688187537070581e-07,
|
23389 |
+
"loss": 0.0,
|
23390 |
+
"step": 3339
|
23391 |
+
},
|
23392 |
+
{
|
23393 |
+
"epoch": 1.9427075759778973,
|
23394 |
+
"grad_norm": NaN,
|
23395 |
+
"learning_rate": 2.125256427708333e-07,
|
23396 |
+
"loss": 0.0,
|
23397 |
+
"step": 3340
|
23398 |
+
},
|
23399 |
+
{
|
23400 |
+
"epoch": 1.943289224952741,
|
23401 |
+
"grad_norm": NaN,
|
23402 |
+
"learning_rate": 2.0821351103966768e-07,
|
23403 |
+
"loss": 0.0,
|
23404 |
+
"step": 3341
|
23405 |
+
},
|
23406 |
+
{
|
23407 |
+
"epoch": 1.9438708739275847,
|
23408 |
+
"grad_norm": NaN,
|
23409 |
+
"learning_rate": 2.0394548399682024e-07,
|
23410 |
+
"loss": 0.0,
|
23411 |
+
"step": 3342
|
23412 |
+
},
|
23413 |
+
{
|
23414 |
+
"epoch": 1.9444525229024285,
|
23415 |
+
"grad_norm": NaN,
|
23416 |
+
"learning_rate": 1.9972156542283904e-07,
|
23417 |
+
"loss": 0.0,
|
23418 |
+
"step": 3343
|
23419 |
+
},
|
23420 |
+
{
|
23421 |
+
"epoch": 1.9450341718772721,
|
23422 |
+
"grad_norm": NaN,
|
23423 |
+
"learning_rate": 1.9554175905919237e-07,
|
23424 |
+
"loss": 0.0,
|
23425 |
+
"step": 3344
|
23426 |
+
},
|
23427 |
+
{
|
23428 |
+
"epoch": 1.9456158208521157,
|
23429 |
+
"grad_norm": NaN,
|
23430 |
+
"learning_rate": 1.9140606860827969e-07,
|
23431 |
+
"loss": 0.0,
|
23432 |
+
"step": 3345
|
23433 |
+
},
|
23434 |
+
{
|
23435 |
+
"epoch": 1.9461974698269593,
|
23436 |
+
"grad_norm": NaN,
|
23437 |
+
"learning_rate": 1.8731449773342625e-07,
|
23438 |
+
"loss": 0.0,
|
23439 |
+
"step": 3346
|
23440 |
}
|
23441 |
],
|
23442 |
"logging_steps": 1,
|
|
|
23456 |
"attributes": {}
|
23457 |
}
|
23458 |
},
|
23459 |
+
"total_flos": 3.118121276669952e+16,
|
23460 |
"train_batch_size": 2,
|
23461 |
"trial_name": null,
|
23462 |
"trial_params": null
|