Training in progress, step 1912, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2503003904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e893d538403d4ac222e2baaf746a33535ee8031c07cf1939cc3355ea15106a0
|
3 |
size 2503003904
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5006244836
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbf245997dbc83cd89bcfb5067dfa742724b5f13ff1993cd0ad6d3d60a4c987a
|
3 |
size 5006244836
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12f09fa1a152c2febaa1b0be3c98d7abd70a22c5965d994af5b7173cc3e6ff7f
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c767cedc54b733779ba8a20f635d848598fd89e5cfee0706f6c63df8c1e6b2d8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11718,6 +11718,1679 @@
|
|
11718 |
"learning_rate": 5.6277671951738716e-06,
|
11719 |
"loss": 0.7867,
|
11720 |
"step": 1673
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11721 |
}
|
11722 |
],
|
11723 |
"logging_steps": 1,
|
@@ -11737,7 +13410,7 @@
|
|
11737 |
"attributes": {}
|
11738 |
}
|
11739 |
},
|
11740 |
-
"total_flos":
|
11741 |
"train_batch_size": 8,
|
11742 |
"trial_name": null,
|
11743 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9775051124744376,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1912,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11718 |
"learning_rate": 5.6277671951738716e-06,
|
11719 |
"loss": 0.7867,
|
11720 |
"step": 1673
|
11721 |
+
},
|
11722 |
+
{
|
11723 |
+
"epoch": 0.8558282208588958,
|
11724 |
+
"grad_norm": 3.041142225265503,
|
11725 |
+
"learning_rate": 5.588822025878476e-06,
|
11726 |
+
"loss": 0.7922,
|
11727 |
+
"step": 1674
|
11728 |
+
},
|
11729 |
+
{
|
11730 |
+
"epoch": 0.8563394683026585,
|
11731 |
+
"grad_norm": 3.526838779449463,
|
11732 |
+
"learning_rate": 5.550004100195639e-06,
|
11733 |
+
"loss": 0.9025,
|
11734 |
+
"step": 1675
|
11735 |
+
},
|
11736 |
+
{
|
11737 |
+
"epoch": 0.8568507157464212,
|
11738 |
+
"grad_norm": 3.6863765716552734,
|
11739 |
+
"learning_rate": 5.5113135293435815e-06,
|
11740 |
+
"loss": 0.9744,
|
11741 |
+
"step": 1676
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 0.8573619631901841,
|
11745 |
+
"grad_norm": 3.2295031547546387,
|
11746 |
+
"learning_rate": 5.4727504241756874e-06,
|
11747 |
+
"loss": 0.8475,
|
11748 |
+
"step": 1677
|
11749 |
+
},
|
11750 |
+
{
|
11751 |
+
"epoch": 0.8578732106339468,
|
11752 |
+
"grad_norm": 3.341581344604492,
|
11753 |
+
"learning_rate": 5.434314895180082e-06,
|
11754 |
+
"loss": 0.8515,
|
11755 |
+
"step": 1678
|
11756 |
+
},
|
11757 |
+
{
|
11758 |
+
"epoch": 0.8583844580777096,
|
11759 |
+
"grad_norm": 3.328876495361328,
|
11760 |
+
"learning_rate": 5.396007052479407e-06,
|
11761 |
+
"loss": 0.8321,
|
11762 |
+
"step": 1679
|
11763 |
+
},
|
11764 |
+
{
|
11765 |
+
"epoch": 0.8588957055214724,
|
11766 |
+
"grad_norm": 3.497668743133545,
|
11767 |
+
"learning_rate": 5.357827005830435e-06,
|
11768 |
+
"loss": 0.8929,
|
11769 |
+
"step": 1680
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 0.8594069529652352,
|
11773 |
+
"grad_norm": 3.690748691558838,
|
11774 |
+
"learning_rate": 5.319774864623834e-06,
|
11775 |
+
"loss": 0.8603,
|
11776 |
+
"step": 1681
|
11777 |
+
},
|
11778 |
+
{
|
11779 |
+
"epoch": 0.8599182004089979,
|
11780 |
+
"grad_norm": 3.709012508392334,
|
11781 |
+
"learning_rate": 5.281850737883731e-06,
|
11782 |
+
"loss": 0.9677,
|
11783 |
+
"step": 1682
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 0.8604294478527608,
|
11787 |
+
"grad_norm": 3.8703484535217285,
|
11788 |
+
"learning_rate": 5.2440547342675614e-06,
|
11789 |
+
"loss": 0.8876,
|
11790 |
+
"step": 1683
|
11791 |
+
},
|
11792 |
+
{
|
11793 |
+
"epoch": 0.8609406952965235,
|
11794 |
+
"grad_norm": 3.9959239959716797,
|
11795 |
+
"learning_rate": 5.206386962065602e-06,
|
11796 |
+
"loss": 0.791,
|
11797 |
+
"step": 1684
|
11798 |
+
},
|
11799 |
+
{
|
11800 |
+
"epoch": 0.8614519427402862,
|
11801 |
+
"grad_norm": 3.8929603099823,
|
11802 |
+
"learning_rate": 5.168847529200782e-06,
|
11803 |
+
"loss": 0.8654,
|
11804 |
+
"step": 1685
|
11805 |
+
},
|
11806 |
+
{
|
11807 |
+
"epoch": 0.8619631901840491,
|
11808 |
+
"grad_norm": 4.002877235412598,
|
11809 |
+
"learning_rate": 5.1314365432282904e-06,
|
11810 |
+
"loss": 0.8284,
|
11811 |
+
"step": 1686
|
11812 |
+
},
|
11813 |
+
{
|
11814 |
+
"epoch": 0.8624744376278118,
|
11815 |
+
"grad_norm": 4.066626071929932,
|
11816 |
+
"learning_rate": 5.094154111335292e-06,
|
11817 |
+
"loss": 0.8603,
|
11818 |
+
"step": 1687
|
11819 |
+
},
|
11820 |
+
{
|
11821 |
+
"epoch": 0.8629856850715747,
|
11822 |
+
"grad_norm": 4.232320785522461,
|
11823 |
+
"learning_rate": 5.057000340340678e-06,
|
11824 |
+
"loss": 0.9426,
|
11825 |
+
"step": 1688
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 0.8634969325153374,
|
11829 |
+
"grad_norm": 3.965895175933838,
|
11830 |
+
"learning_rate": 5.019975336694649e-06,
|
11831 |
+
"loss": 0.7798,
|
11832 |
+
"step": 1689
|
11833 |
+
},
|
11834 |
+
{
|
11835 |
+
"epoch": 0.8640081799591002,
|
11836 |
+
"grad_norm": 4.249181270599365,
|
11837 |
+
"learning_rate": 4.983079206478513e-06,
|
11838 |
+
"loss": 0.8211,
|
11839 |
+
"step": 1690
|
11840 |
+
},
|
11841 |
+
{
|
11842 |
+
"epoch": 0.864519427402863,
|
11843 |
+
"grad_norm": 4.230957508087158,
|
11844 |
+
"learning_rate": 4.946312055404328e-06,
|
11845 |
+
"loss": 0.9142,
|
11846 |
+
"step": 1691
|
11847 |
+
},
|
11848 |
+
{
|
11849 |
+
"epoch": 0.8650306748466258,
|
11850 |
+
"grad_norm": 4.479368209838867,
|
11851 |
+
"learning_rate": 4.909673988814601e-06,
|
11852 |
+
"loss": 0.9803,
|
11853 |
+
"step": 1692
|
11854 |
+
},
|
11855 |
+
{
|
11856 |
+
"epoch": 0.8655419222903885,
|
11857 |
+
"grad_norm": 4.021700859069824,
|
11858 |
+
"learning_rate": 4.873165111681993e-06,
|
11859 |
+
"loss": 0.7705,
|
11860 |
+
"step": 1693
|
11861 |
+
},
|
11862 |
+
{
|
11863 |
+
"epoch": 0.8660531697341514,
|
11864 |
+
"grad_norm": 4.131351470947266,
|
11865 |
+
"learning_rate": 4.836785528609051e-06,
|
11866 |
+
"loss": 0.7206,
|
11867 |
+
"step": 1694
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 0.8665644171779141,
|
11871 |
+
"grad_norm": 4.708834171295166,
|
11872 |
+
"learning_rate": 4.800535343827833e-06,
|
11873 |
+
"loss": 0.8698,
|
11874 |
+
"step": 1695
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 0.8670756646216768,
|
11878 |
+
"grad_norm": 4.927844047546387,
|
11879 |
+
"learning_rate": 4.764414661199707e-06,
|
11880 |
+
"loss": 0.7615,
|
11881 |
+
"step": 1696
|
11882 |
+
},
|
11883 |
+
{
|
11884 |
+
"epoch": 0.8675869120654397,
|
11885 |
+
"grad_norm": 5.510052680969238,
|
11886 |
+
"learning_rate": 4.728423584214947e-06,
|
11887 |
+
"loss": 0.9676,
|
11888 |
+
"step": 1697
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 0.8680981595092024,
|
11892 |
+
"grad_norm": 5.557268142700195,
|
11893 |
+
"learning_rate": 4.692562215992541e-06,
|
11894 |
+
"loss": 0.6631,
|
11895 |
+
"step": 1698
|
11896 |
+
},
|
11897 |
+
{
|
11898 |
+
"epoch": 0.8686094069529653,
|
11899 |
+
"grad_norm": 6.219789028167725,
|
11900 |
+
"learning_rate": 4.656830659279804e-06,
|
11901 |
+
"loss": 0.8028,
|
11902 |
+
"step": 1699
|
11903 |
+
},
|
11904 |
+
{
|
11905 |
+
"epoch": 0.869120654396728,
|
11906 |
+
"grad_norm": 5.629024028778076,
|
11907 |
+
"learning_rate": 4.621229016452156e-06,
|
11908 |
+
"loss": 0.4033,
|
11909 |
+
"step": 1700
|
11910 |
+
},
|
11911 |
+
{
|
11912 |
+
"epoch": 0.8696319018404908,
|
11913 |
+
"grad_norm": 1.9563968181610107,
|
11914 |
+
"learning_rate": 4.585757389512768e-06,
|
11915 |
+
"loss": 0.8627,
|
11916 |
+
"step": 1701
|
11917 |
+
},
|
11918 |
+
{
|
11919 |
+
"epoch": 0.8701431492842536,
|
11920 |
+
"grad_norm": 2.113006591796875,
|
11921 |
+
"learning_rate": 4.550415880092313e-06,
|
11922 |
+
"loss": 0.8872,
|
11923 |
+
"step": 1702
|
11924 |
+
},
|
11925 |
+
{
|
11926 |
+
"epoch": 0.8706543967280164,
|
11927 |
+
"grad_norm": 2.489034652709961,
|
11928 |
+
"learning_rate": 4.515204589448674e-06,
|
11929 |
+
"loss": 0.9124,
|
11930 |
+
"step": 1703
|
11931 |
+
},
|
11932 |
+
{
|
11933 |
+
"epoch": 0.8711656441717791,
|
11934 |
+
"grad_norm": 2.6170196533203125,
|
11935 |
+
"learning_rate": 4.48012361846662e-06,
|
11936 |
+
"loss": 1.0461,
|
11937 |
+
"step": 1704
|
11938 |
+
},
|
11939 |
+
{
|
11940 |
+
"epoch": 0.871676891615542,
|
11941 |
+
"grad_norm": 2.774226188659668,
|
11942 |
+
"learning_rate": 4.445173067657554e-06,
|
11943 |
+
"loss": 1.0186,
|
11944 |
+
"step": 1705
|
11945 |
+
},
|
11946 |
+
{
|
11947 |
+
"epoch": 0.8721881390593047,
|
11948 |
+
"grad_norm": 2.7719779014587402,
|
11949 |
+
"learning_rate": 4.410353037159193e-06,
|
11950 |
+
"loss": 0.9851,
|
11951 |
+
"step": 1706
|
11952 |
+
},
|
11953 |
+
{
|
11954 |
+
"epoch": 0.8726993865030674,
|
11955 |
+
"grad_norm": 2.8371808528900146,
|
11956 |
+
"learning_rate": 4.3756636267353214e-06,
|
11957 |
+
"loss": 1.0079,
|
11958 |
+
"step": 1707
|
11959 |
+
},
|
11960 |
+
{
|
11961 |
+
"epoch": 0.8732106339468303,
|
11962 |
+
"grad_norm": 3.129444122314453,
|
11963 |
+
"learning_rate": 4.341104935775442e-06,
|
11964 |
+
"loss": 0.8973,
|
11965 |
+
"step": 1708
|
11966 |
+
},
|
11967 |
+
{
|
11968 |
+
"epoch": 0.873721881390593,
|
11969 |
+
"grad_norm": 2.850924491882324,
|
11970 |
+
"learning_rate": 4.306677063294573e-06,
|
11971 |
+
"loss": 0.918,
|
11972 |
+
"step": 1709
|
11973 |
+
},
|
11974 |
+
{
|
11975 |
+
"epoch": 0.8742331288343558,
|
11976 |
+
"grad_norm": 2.839162826538086,
|
11977 |
+
"learning_rate": 4.272380107932888e-06,
|
11978 |
+
"loss": 0.9134,
|
11979 |
+
"step": 1710
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 0.8747443762781186,
|
11983 |
+
"grad_norm": 3.149712562561035,
|
11984 |
+
"learning_rate": 4.238214167955484e-06,
|
11985 |
+
"loss": 0.9679,
|
11986 |
+
"step": 1711
|
11987 |
+
},
|
11988 |
+
{
|
11989 |
+
"epoch": 0.8752556237218814,
|
11990 |
+
"grad_norm": 2.945416212081909,
|
11991 |
+
"learning_rate": 4.2041793412520734e-06,
|
11992 |
+
"loss": 0.8602,
|
11993 |
+
"step": 1712
|
11994 |
+
},
|
11995 |
+
{
|
11996 |
+
"epoch": 0.8757668711656442,
|
11997 |
+
"grad_norm": 3.0397250652313232,
|
11998 |
+
"learning_rate": 4.17027572533672e-06,
|
11999 |
+
"loss": 0.9156,
|
12000 |
+
"step": 1713
|
12001 |
+
},
|
12002 |
+
{
|
12003 |
+
"epoch": 0.876278118609407,
|
12004 |
+
"grad_norm": 3.256863832473755,
|
12005 |
+
"learning_rate": 4.136503417347554e-06,
|
12006 |
+
"loss": 0.7461,
|
12007 |
+
"step": 1714
|
12008 |
+
},
|
12009 |
+
{
|
12010 |
+
"epoch": 0.8767893660531697,
|
12011 |
+
"grad_norm": 3.1958820819854736,
|
12012 |
+
"learning_rate": 4.102862514046474e-06,
|
12013 |
+
"loss": 0.8761,
|
12014 |
+
"step": 1715
|
12015 |
+
},
|
12016 |
+
{
|
12017 |
+
"epoch": 0.8773006134969326,
|
12018 |
+
"grad_norm": 3.4146556854248047,
|
12019 |
+
"learning_rate": 4.069353111818913e-06,
|
12020 |
+
"loss": 0.9106,
|
12021 |
+
"step": 1716
|
12022 |
+
},
|
12023 |
+
{
|
12024 |
+
"epoch": 0.8778118609406953,
|
12025 |
+
"grad_norm": 3.057286024093628,
|
12026 |
+
"learning_rate": 4.035975306673517e-06,
|
12027 |
+
"loss": 0.8755,
|
12028 |
+
"step": 1717
|
12029 |
+
},
|
12030 |
+
{
|
12031 |
+
"epoch": 0.878323108384458,
|
12032 |
+
"grad_norm": 3.0085763931274414,
|
12033 |
+
"learning_rate": 4.0027291942419055e-06,
|
12034 |
+
"loss": 0.7805,
|
12035 |
+
"step": 1718
|
12036 |
+
},
|
12037 |
+
{
|
12038 |
+
"epoch": 0.8788343558282209,
|
12039 |
+
"grad_norm": 3.2898752689361572,
|
12040 |
+
"learning_rate": 3.969614869778354e-06,
|
12041 |
+
"loss": 0.8877,
|
12042 |
+
"step": 1719
|
12043 |
+
},
|
12044 |
+
{
|
12045 |
+
"epoch": 0.8793456032719836,
|
12046 |
+
"grad_norm": 3.1025335788726807,
|
12047 |
+
"learning_rate": 3.936632428159609e-06,
|
12048 |
+
"loss": 0.808,
|
12049 |
+
"step": 1720
|
12050 |
+
},
|
12051 |
+
{
|
12052 |
+
"epoch": 0.8798568507157464,
|
12053 |
+
"grad_norm": 3.2930991649627686,
|
12054 |
+
"learning_rate": 3.903781963884467e-06,
|
12055 |
+
"loss": 0.8736,
|
12056 |
+
"step": 1721
|
12057 |
+
},
|
12058 |
+
{
|
12059 |
+
"epoch": 0.8803680981595092,
|
12060 |
+
"grad_norm": 3.3883445262908936,
|
12061 |
+
"learning_rate": 3.871063571073668e-06,
|
12062 |
+
"loss": 0.9969,
|
12063 |
+
"step": 1722
|
12064 |
+
},
|
12065 |
+
{
|
12066 |
+
"epoch": 0.880879345603272,
|
12067 |
+
"grad_norm": 3.6220452785491943,
|
12068 |
+
"learning_rate": 3.838477343469516e-06,
|
12069 |
+
"loss": 0.88,
|
12070 |
+
"step": 1723
|
12071 |
+
},
|
12072 |
+
{
|
12073 |
+
"epoch": 0.8813905930470347,
|
12074 |
+
"grad_norm": 3.937267303466797,
|
12075 |
+
"learning_rate": 3.8060233744356633e-06,
|
12076 |
+
"loss": 0.9832,
|
12077 |
+
"step": 1724
|
12078 |
+
},
|
12079 |
+
{
|
12080 |
+
"epoch": 0.8819018404907976,
|
12081 |
+
"grad_norm": 3.5126397609710693,
|
12082 |
+
"learning_rate": 3.77370175695681e-06,
|
12083 |
+
"loss": 0.8513,
|
12084 |
+
"step": 1725
|
12085 |
+
},
|
12086 |
+
{
|
12087 |
+
"epoch": 0.8824130879345603,
|
12088 |
+
"grad_norm": 3.5317177772521973,
|
12089 |
+
"learning_rate": 3.74151258363844e-06,
|
12090 |
+
"loss": 0.8387,
|
12091 |
+
"step": 1726
|
12092 |
+
},
|
12093 |
+
{
|
12094 |
+
"epoch": 0.8829243353783232,
|
12095 |
+
"grad_norm": 3.232076406478882,
|
12096 |
+
"learning_rate": 3.7094559467066083e-06,
|
12097 |
+
"loss": 0.8725,
|
12098 |
+
"step": 1727
|
12099 |
+
},
|
12100 |
+
{
|
12101 |
+
"epoch": 0.8834355828220859,
|
12102 |
+
"grad_norm": 3.6095595359802246,
|
12103 |
+
"learning_rate": 3.6775319380076e-06,
|
12104 |
+
"loss": 0.9068,
|
12105 |
+
"step": 1728
|
12106 |
+
},
|
12107 |
+
{
|
12108 |
+
"epoch": 0.8839468302658486,
|
12109 |
+
"grad_norm": 3.2999720573425293,
|
12110 |
+
"learning_rate": 3.645740649007734e-06,
|
12111 |
+
"loss": 0.806,
|
12112 |
+
"step": 1729
|
12113 |
+
},
|
12114 |
+
{
|
12115 |
+
"epoch": 0.8844580777096115,
|
12116 |
+
"grad_norm": 3.733455181121826,
|
12117 |
+
"learning_rate": 3.614082170793021e-06,
|
12118 |
+
"loss": 0.8415,
|
12119 |
+
"step": 1730
|
12120 |
+
},
|
12121 |
+
{
|
12122 |
+
"epoch": 0.8849693251533742,
|
12123 |
+
"grad_norm": 3.5478620529174805,
|
12124 |
+
"learning_rate": 3.5825565940690087e-06,
|
12125 |
+
"loss": 0.8471,
|
12126 |
+
"step": 1731
|
12127 |
+
},
|
12128 |
+
{
|
12129 |
+
"epoch": 0.885480572597137,
|
12130 |
+
"grad_norm": 3.889519214630127,
|
12131 |
+
"learning_rate": 3.551164009160429e-06,
|
12132 |
+
"loss": 0.8128,
|
12133 |
+
"step": 1732
|
12134 |
+
},
|
12135 |
+
{
|
12136 |
+
"epoch": 0.8859918200408998,
|
12137 |
+
"grad_norm": 3.7842485904693604,
|
12138 |
+
"learning_rate": 3.5199045060110013e-06,
|
12139 |
+
"loss": 0.9556,
|
12140 |
+
"step": 1733
|
12141 |
+
},
|
12142 |
+
{
|
12143 |
+
"epoch": 0.8865030674846626,
|
12144 |
+
"grad_norm": 3.5816397666931152,
|
12145 |
+
"learning_rate": 3.488778174183116e-06,
|
12146 |
+
"loss": 0.8108,
|
12147 |
+
"step": 1734
|
12148 |
+
},
|
12149 |
+
{
|
12150 |
+
"epoch": 0.8870143149284253,
|
12151 |
+
"grad_norm": 3.812117338180542,
|
12152 |
+
"learning_rate": 3.4577851028576523e-06,
|
12153 |
+
"loss": 0.7997,
|
12154 |
+
"step": 1735
|
12155 |
+
},
|
12156 |
+
{
|
12157 |
+
"epoch": 0.8875255623721882,
|
12158 |
+
"grad_norm": 3.7888643741607666,
|
12159 |
+
"learning_rate": 3.4269253808336455e-06,
|
12160 |
+
"loss": 0.8451,
|
12161 |
+
"step": 1736
|
12162 |
+
},
|
12163 |
+
{
|
12164 |
+
"epoch": 0.8880368098159509,
|
12165 |
+
"grad_norm": 3.8936595916748047,
|
12166 |
+
"learning_rate": 3.3961990965280745e-06,
|
12167 |
+
"loss": 0.8154,
|
12168 |
+
"step": 1737
|
12169 |
+
},
|
12170 |
+
{
|
12171 |
+
"epoch": 0.8885480572597138,
|
12172 |
+
"grad_norm": 4.134406566619873,
|
12173 |
+
"learning_rate": 3.36560633797563e-06,
|
12174 |
+
"loss": 0.8866,
|
12175 |
+
"step": 1738
|
12176 |
+
},
|
12177 |
+
{
|
12178 |
+
"epoch": 0.8890593047034765,
|
12179 |
+
"grad_norm": 4.125121116638184,
|
12180 |
+
"learning_rate": 3.335147192828403e-06,
|
12181 |
+
"loss": 0.7727,
|
12182 |
+
"step": 1739
|
12183 |
+
},
|
12184 |
+
{
|
12185 |
+
"epoch": 0.8895705521472392,
|
12186 |
+
"grad_norm": 4.411681175231934,
|
12187 |
+
"learning_rate": 3.3048217483556744e-06,
|
12188 |
+
"loss": 0.8563,
|
12189 |
+
"step": 1740
|
12190 |
+
},
|
12191 |
+
{
|
12192 |
+
"epoch": 0.8900817995910021,
|
12193 |
+
"grad_norm": 3.7549989223480225,
|
12194 |
+
"learning_rate": 3.2746300914436534e-06,
|
12195 |
+
"loss": 0.7357,
|
12196 |
+
"step": 1741
|
12197 |
+
},
|
12198 |
+
{
|
12199 |
+
"epoch": 0.8905930470347648,
|
12200 |
+
"grad_norm": 4.538980960845947,
|
12201 |
+
"learning_rate": 3.2445723085952504e-06,
|
12202 |
+
"loss": 0.818,
|
12203 |
+
"step": 1742
|
12204 |
+
},
|
12205 |
+
{
|
12206 |
+
"epoch": 0.8911042944785276,
|
12207 |
+
"grad_norm": 4.389502048492432,
|
12208 |
+
"learning_rate": 3.214648485929783e-06,
|
12209 |
+
"loss": 0.8799,
|
12210 |
+
"step": 1743
|
12211 |
+
},
|
12212 |
+
{
|
12213 |
+
"epoch": 0.8916155419222904,
|
12214 |
+
"grad_norm": 4.548320293426514,
|
12215 |
+
"learning_rate": 3.184858709182775e-06,
|
12216 |
+
"loss": 0.6832,
|
12217 |
+
"step": 1744
|
12218 |
+
},
|
12219 |
+
{
|
12220 |
+
"epoch": 0.8921267893660532,
|
12221 |
+
"grad_norm": 4.614025115966797,
|
12222 |
+
"learning_rate": 3.1552030637056806e-06,
|
12223 |
+
"loss": 0.8305,
|
12224 |
+
"step": 1745
|
12225 |
+
},
|
12226 |
+
{
|
12227 |
+
"epoch": 0.8926380368098159,
|
12228 |
+
"grad_norm": 4.867095470428467,
|
12229 |
+
"learning_rate": 3.1256816344656602e-06,
|
12230 |
+
"loss": 0.8145,
|
12231 |
+
"step": 1746
|
12232 |
+
},
|
12233 |
+
{
|
12234 |
+
"epoch": 0.8931492842535788,
|
12235 |
+
"grad_norm": 4.788604259490967,
|
12236 |
+
"learning_rate": 3.096294506045311e-06,
|
12237 |
+
"loss": 0.8077,
|
12238 |
+
"step": 1747
|
12239 |
+
},
|
12240 |
+
{
|
12241 |
+
"epoch": 0.8936605316973415,
|
12242 |
+
"grad_norm": 5.478014945983887,
|
12243 |
+
"learning_rate": 3.067041762642475e-06,
|
12244 |
+
"loss": 0.771,
|
12245 |
+
"step": 1748
|
12246 |
+
},
|
12247 |
+
{
|
12248 |
+
"epoch": 0.8941717791411042,
|
12249 |
+
"grad_norm": 6.572366237640381,
|
12250 |
+
"learning_rate": 3.037923488069927e-06,
|
12251 |
+
"loss": 0.7604,
|
12252 |
+
"step": 1749
|
12253 |
+
},
|
12254 |
+
{
|
12255 |
+
"epoch": 0.8946830265848671,
|
12256 |
+
"grad_norm": 5.753463268280029,
|
12257 |
+
"learning_rate": 3.0089397657551865e-06,
|
12258 |
+
"loss": 0.4072,
|
12259 |
+
"step": 1750
|
12260 |
+
},
|
12261 |
+
{
|
12262 |
+
"epoch": 0.8951942740286298,
|
12263 |
+
"grad_norm": 1.9616332054138184,
|
12264 |
+
"learning_rate": 2.9800906787402716e-06,
|
12265 |
+
"loss": 0.9257,
|
12266 |
+
"step": 1751
|
12267 |
+
},
|
12268 |
+
{
|
12269 |
+
"epoch": 0.8957055214723927,
|
12270 |
+
"grad_norm": 2.392256259918213,
|
12271 |
+
"learning_rate": 2.9513763096814305e-06,
|
12272 |
+
"loss": 1.0013,
|
12273 |
+
"step": 1752
|
12274 |
+
},
|
12275 |
+
{
|
12276 |
+
"epoch": 0.8962167689161554,
|
12277 |
+
"grad_norm": 2.3039324283599854,
|
12278 |
+
"learning_rate": 2.9227967408489653e-06,
|
12279 |
+
"loss": 0.9961,
|
12280 |
+
"step": 1753
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 0.8967280163599182,
|
12284 |
+
"grad_norm": 2.534642457962036,
|
12285 |
+
"learning_rate": 2.89435205412692e-06,
|
12286 |
+
"loss": 0.9289,
|
12287 |
+
"step": 1754
|
12288 |
+
},
|
12289 |
+
{
|
12290 |
+
"epoch": 0.897239263803681,
|
12291 |
+
"grad_norm": 2.941516160964966,
|
12292 |
+
"learning_rate": 2.8660423310129135e-06,
|
12293 |
+
"loss": 0.957,
|
12294 |
+
"step": 1755
|
12295 |
+
},
|
12296 |
+
{
|
12297 |
+
"epoch": 0.8977505112474438,
|
12298 |
+
"grad_norm": 2.806290626525879,
|
12299 |
+
"learning_rate": 2.8378676526178482e-06,
|
12300 |
+
"loss": 1.0622,
|
12301 |
+
"step": 1756
|
12302 |
+
},
|
12303 |
+
{
|
12304 |
+
"epoch": 0.8982617586912065,
|
12305 |
+
"grad_norm": 2.6641461849212646,
|
12306 |
+
"learning_rate": 2.8098280996657456e-06,
|
12307 |
+
"loss": 0.9309,
|
12308 |
+
"step": 1757
|
12309 |
+
},
|
12310 |
+
{
|
12311 |
+
"epoch": 0.8987730061349694,
|
12312 |
+
"grad_norm": 2.6802642345428467,
|
12313 |
+
"learning_rate": 2.781923752493437e-06,
|
12314 |
+
"loss": 0.8945,
|
12315 |
+
"step": 1758
|
12316 |
+
},
|
12317 |
+
{
|
12318 |
+
"epoch": 0.8992842535787321,
|
12319 |
+
"grad_norm": 2.851447820663452,
|
12320 |
+
"learning_rate": 2.754154691050387e-06,
|
12321 |
+
"loss": 0.8722,
|
12322 |
+
"step": 1759
|
12323 |
+
},
|
12324 |
+
{
|
12325 |
+
"epoch": 0.8997955010224948,
|
12326 |
+
"grad_norm": 2.958563804626465,
|
12327 |
+
"learning_rate": 2.7265209948984514e-06,
|
12328 |
+
"loss": 0.8529,
|
12329 |
+
"step": 1760
|
12330 |
+
},
|
12331 |
+
{
|
12332 |
+
"epoch": 0.9003067484662577,
|
12333 |
+
"grad_norm": 2.867089033126831,
|
12334 |
+
"learning_rate": 2.6990227432116544e-06,
|
12335 |
+
"loss": 0.8957,
|
12336 |
+
"step": 1761
|
12337 |
+
},
|
12338 |
+
{
|
12339 |
+
"epoch": 0.9008179959100204,
|
12340 |
+
"grad_norm": 3.3198704719543457,
|
12341 |
+
"learning_rate": 2.671660014775934e-06,
|
12342 |
+
"loss": 0.8905,
|
12343 |
+
"step": 1762
|
12344 |
+
},
|
12345 |
+
{
|
12346 |
+
"epoch": 0.9013292433537833,
|
12347 |
+
"grad_norm": 3.0108888149261475,
|
12348 |
+
"learning_rate": 2.6444328879889622e-06,
|
12349 |
+
"loss": 0.8434,
|
12350 |
+
"step": 1763
|
12351 |
+
},
|
12352 |
+
{
|
12353 |
+
"epoch": 0.901840490797546,
|
12354 |
+
"grad_norm": 2.9155540466308594,
|
12355 |
+
"learning_rate": 2.6173414408598827e-06,
|
12356 |
+
"loss": 0.8414,
|
12357 |
+
"step": 1764
|
12358 |
+
},
|
12359 |
+
{
|
12360 |
+
"epoch": 0.9023517382413088,
|
12361 |
+
"grad_norm": 3.178889036178589,
|
12362 |
+
"learning_rate": 2.5903857510090835e-06,
|
12363 |
+
"loss": 0.9461,
|
12364 |
+
"step": 1765
|
12365 |
+
},
|
12366 |
+
{
|
12367 |
+
"epoch": 0.9028629856850716,
|
12368 |
+
"grad_norm": 3.119640588760376,
|
12369 |
+
"learning_rate": 2.56356589566803e-06,
|
12370 |
+
"loss": 0.8996,
|
12371 |
+
"step": 1766
|
12372 |
+
},
|
12373 |
+
{
|
12374 |
+
"epoch": 0.9033742331288344,
|
12375 |
+
"grad_norm": 3.1831552982330322,
|
12376 |
+
"learning_rate": 2.53688195167896e-06,
|
12377 |
+
"loss": 0.9139,
|
12378 |
+
"step": 1767
|
12379 |
+
},
|
12380 |
+
{
|
12381 |
+
"epoch": 0.9038854805725971,
|
12382 |
+
"grad_norm": 3.3082258701324463,
|
12383 |
+
"learning_rate": 2.5103339954947626e-06,
|
12384 |
+
"loss": 0.8465,
|
12385 |
+
"step": 1768
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 0.90439672801636,
|
12389 |
+
"grad_norm": 3.1402430534362793,
|
12390 |
+
"learning_rate": 2.483922103178632e-06,
|
12391 |
+
"loss": 0.9071,
|
12392 |
+
"step": 1769
|
12393 |
+
},
|
12394 |
+
{
|
12395 |
+
"epoch": 0.9049079754601227,
|
12396 |
+
"grad_norm": 3.3881165981292725,
|
12397 |
+
"learning_rate": 2.4576463504039913e-06,
|
12398 |
+
"loss": 0.9479,
|
12399 |
+
"step": 1770
|
12400 |
+
},
|
12401 |
+
{
|
12402 |
+
"epoch": 0.9054192229038854,
|
12403 |
+
"grad_norm": 3.3849008083343506,
|
12404 |
+
"learning_rate": 2.4315068124541597e-06,
|
12405 |
+
"loss": 0.8833,
|
12406 |
+
"step": 1771
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 0.9059304703476483,
|
12410 |
+
"grad_norm": 3.725773334503174,
|
12411 |
+
"learning_rate": 2.4055035642222224e-06,
|
12412 |
+
"loss": 0.8946,
|
12413 |
+
"step": 1772
|
12414 |
+
},
|
12415 |
+
{
|
12416 |
+
"epoch": 0.906441717791411,
|
12417 |
+
"grad_norm": 4.276130676269531,
|
12418 |
+
"learning_rate": 2.3796366802107394e-06,
|
12419 |
+
"loss": 0.9649,
|
12420 |
+
"step": 1773
|
12421 |
+
},
|
12422 |
+
{
|
12423 |
+
"epoch": 0.9069529652351738,
|
12424 |
+
"grad_norm": 3.543367862701416,
|
12425 |
+
"learning_rate": 2.3539062345316e-06,
|
12426 |
+
"loss": 0.8461,
|
12427 |
+
"step": 1774
|
12428 |
+
},
|
12429 |
+
{
|
12430 |
+
"epoch": 0.9074642126789366,
|
12431 |
+
"grad_norm": 3.076871156692505,
|
12432 |
+
"learning_rate": 2.3283123009057607e-06,
|
12433 |
+
"loss": 0.8243,
|
12434 |
+
"step": 1775
|
12435 |
+
},
|
12436 |
+
{
|
12437 |
+
"epoch": 0.9079754601226994,
|
12438 |
+
"grad_norm": 3.6242263317108154,
|
12439 |
+
"learning_rate": 2.3028549526630583e-06,
|
12440 |
+
"loss": 0.8345,
|
12441 |
+
"step": 1776
|
12442 |
+
},
|
12443 |
+
{
|
12444 |
+
"epoch": 0.9084867075664622,
|
12445 |
+
"grad_norm": 3.5291264057159424,
|
12446 |
+
"learning_rate": 2.277534262742015e-06,
|
12447 |
+
"loss": 0.892,
|
12448 |
+
"step": 1777
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 0.908997955010225,
|
12452 |
+
"grad_norm": 3.4097137451171875,
|
12453 |
+
"learning_rate": 2.2523503036895764e-06,
|
12454 |
+
"loss": 0.8539,
|
12455 |
+
"step": 1778
|
12456 |
+
},
|
12457 |
+
{
|
12458 |
+
"epoch": 0.9095092024539877,
|
12459 |
+
"grad_norm": 3.7175967693328857,
|
12460 |
+
"learning_rate": 2.227303147660964e-06,
|
12461 |
+
"loss": 0.9083,
|
12462 |
+
"step": 1779
|
12463 |
+
},
|
12464 |
+
{
|
12465 |
+
"epoch": 0.9100204498977505,
|
12466 |
+
"grad_norm": 3.9345943927764893,
|
12467 |
+
"learning_rate": 2.202392866419423e-06,
|
12468 |
+
"loss": 0.9569,
|
12469 |
+
"step": 1780
|
12470 |
+
},
|
12471 |
+
{
|
12472 |
+
"epoch": 0.9105316973415133,
|
12473 |
+
"grad_norm": 3.6897552013397217,
|
12474 |
+
"learning_rate": 2.1776195313360505e-06,
|
12475 |
+
"loss": 0.8444,
|
12476 |
+
"step": 1781
|
12477 |
+
},
|
12478 |
+
{
|
12479 |
+
"epoch": 0.911042944785276,
|
12480 |
+
"grad_norm": 3.9733729362487793,
|
12481 |
+
"learning_rate": 2.152983213389559e-06,
|
12482 |
+
"loss": 0.9426,
|
12483 |
+
"step": 1782
|
12484 |
+
},
|
12485 |
+
{
|
12486 |
+
"epoch": 0.9115541922290389,
|
12487 |
+
"grad_norm": 4.125808238983154,
|
12488 |
+
"learning_rate": 2.1284839831661075e-06,
|
12489 |
+
"loss": 0.8886,
|
12490 |
+
"step": 1783
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 0.9120654396728016,
|
12494 |
+
"grad_norm": 3.4890663623809814,
|
12495 |
+
"learning_rate": 2.1041219108590692e-06,
|
12496 |
+
"loss": 0.8138,
|
12497 |
+
"step": 1784
|
12498 |
+
},
|
12499 |
+
{
|
12500 |
+
"epoch": 0.9125766871165644,
|
12501 |
+
"grad_norm": 3.9719114303588867,
|
12502 |
+
"learning_rate": 2.0798970662688545e-06,
|
12503 |
+
"loss": 0.8747,
|
12504 |
+
"step": 1785
|
12505 |
+
},
|
12506 |
+
{
|
12507 |
+
"epoch": 0.9130879345603272,
|
12508 |
+
"grad_norm": 4.040510654449463,
|
12509 |
+
"learning_rate": 2.055809518802676e-06,
|
12510 |
+
"loss": 1.0,
|
12511 |
+
"step": 1786
|
12512 |
+
},
|
12513 |
+
{
|
12514 |
+
"epoch": 0.91359918200409,
|
12515 |
+
"grad_norm": 4.081876754760742,
|
12516 |
+
"learning_rate": 2.031859337474407e-06,
|
12517 |
+
"loss": 0.7761,
|
12518 |
+
"step": 1787
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 0.9141104294478528,
|
12522 |
+
"grad_norm": 4.227837562561035,
|
12523 |
+
"learning_rate": 2.0080465909043113e-06,
|
12524 |
+
"loss": 0.8716,
|
12525 |
+
"step": 1788
|
12526 |
+
},
|
12527 |
+
{
|
12528 |
+
"epoch": 0.9146216768916156,
|
12529 |
+
"grad_norm": 4.269420623779297,
|
12530 |
+
"learning_rate": 1.984371347318914e-06,
|
12531 |
+
"loss": 0.8066,
|
12532 |
+
"step": 1789
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 0.9151329243353783,
|
12536 |
+
"grad_norm": 4.166466236114502,
|
12537 |
+
"learning_rate": 1.9608336745507716e-06,
|
12538 |
+
"loss": 0.802,
|
12539 |
+
"step": 1790
|
12540 |
+
},
|
12541 |
+
{
|
12542 |
+
"epoch": 0.9156441717791411,
|
12543 |
+
"grad_norm": 4.287985801696777,
|
12544 |
+
"learning_rate": 1.937433640038261e-06,
|
12545 |
+
"loss": 0.7958,
|
12546 |
+
"step": 1791
|
12547 |
+
},
|
12548 |
+
{
|
12549 |
+
"epoch": 0.9161554192229039,
|
12550 |
+
"grad_norm": 4.09846830368042,
|
12551 |
+
"learning_rate": 1.914171310825441e-06,
|
12552 |
+
"loss": 0.7164,
|
12553 |
+
"step": 1792
|
12554 |
+
},
|
12555 |
+
{
|
12556 |
+
"epoch": 0.9166666666666666,
|
12557 |
+
"grad_norm": 4.546944618225098,
|
12558 |
+
"learning_rate": 1.8910467535617983e-06,
|
12559 |
+
"loss": 0.7325,
|
12560 |
+
"step": 1793
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 0.9171779141104295,
|
12564 |
+
"grad_norm": 4.4474897384643555,
|
12565 |
+
"learning_rate": 1.8680600345021171e-06,
|
12566 |
+
"loss": 0.7291,
|
12567 |
+
"step": 1794
|
12568 |
+
},
|
12569 |
+
{
|
12570 |
+
"epoch": 0.9176891615541922,
|
12571 |
+
"grad_norm": 4.722184658050537,
|
12572 |
+
"learning_rate": 1.845211219506221e-06,
|
12573 |
+
"loss": 0.6715,
|
12574 |
+
"step": 1795
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 0.918200408997955,
|
12578 |
+
"grad_norm": 5.06058931350708,
|
12579 |
+
"learning_rate": 1.8225003740388547e-06,
|
12580 |
+
"loss": 0.9135,
|
12581 |
+
"step": 1796
|
12582 |
+
},
|
12583 |
+
{
|
12584 |
+
"epoch": 0.9187116564417178,
|
12585 |
+
"grad_norm": 5.043420791625977,
|
12586 |
+
"learning_rate": 1.79992756316944e-06,
|
12587 |
+
"loss": 0.7258,
|
12588 |
+
"step": 1797
|
12589 |
+
},
|
12590 |
+
{
|
12591 |
+
"epoch": 0.9192229038854806,
|
12592 |
+
"grad_norm": 5.176082134246826,
|
12593 |
+
"learning_rate": 1.7774928515719157e-06,
|
12594 |
+
"loss": 0.7435,
|
12595 |
+
"step": 1798
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 0.9197341513292433,
|
12599 |
+
"grad_norm": 6.064294815063477,
|
12600 |
+
"learning_rate": 1.7551963035245588e-06,
|
12601 |
+
"loss": 0.7142,
|
12602 |
+
"step": 1799
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 0.9202453987730062,
|
12606 |
+
"grad_norm": 6.77680778503418,
|
12607 |
+
"learning_rate": 1.733037982909791e-06,
|
12608 |
+
"loss": 0.4713,
|
12609 |
+
"step": 1800
|
12610 |
+
},
|
12611 |
+
{
|
12612 |
+
"epoch": 0.9207566462167689,
|
12613 |
+
"grad_norm": 1.9696067571640015,
|
12614 |
+
"learning_rate": 1.7110179532139781e-06,
|
12615 |
+
"loss": 0.8423,
|
12616 |
+
"step": 1801
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 0.9212678936605317,
|
12620 |
+
"grad_norm": 2.4090499877929688,
|
12621 |
+
"learning_rate": 1.6891362775272812e-06,
|
12622 |
+
"loss": 0.9596,
|
12623 |
+
"step": 1802
|
12624 |
+
},
|
12625 |
+
{
|
12626 |
+
"epoch": 0.9217791411042945,
|
12627 |
+
"grad_norm": 2.441483974456787,
|
12628 |
+
"learning_rate": 1.6673930185434561e-06,
|
12629 |
+
"loss": 0.8638,
|
12630 |
+
"step": 1803
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 0.9222903885480572,
|
12634 |
+
"grad_norm": 2.631037712097168,
|
12635 |
+
"learning_rate": 1.6457882385596646e-06,
|
12636 |
+
"loss": 1.0107,
|
12637 |
+
"step": 1804
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 0.9228016359918201,
|
12641 |
+
"grad_norm": 2.69331431388855,
|
12642 |
+
"learning_rate": 1.6243219994763304e-06,
|
12643 |
+
"loss": 0.9641,
|
12644 |
+
"step": 1805
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 0.9233128834355828,
|
12648 |
+
"grad_norm": 2.4592180252075195,
|
12649 |
+
"learning_rate": 1.6029943627969223e-06,
|
12650 |
+
"loss": 0.8723,
|
12651 |
+
"step": 1806
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 0.9238241308793456,
|
12655 |
+
"grad_norm": 2.729130744934082,
|
12656 |
+
"learning_rate": 1.5818053896278162e-06,
|
12657 |
+
"loss": 0.9765,
|
12658 |
+
"step": 1807
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 0.9243353783231084,
|
12662 |
+
"grad_norm": 2.6787896156311035,
|
12663 |
+
"learning_rate": 1.5607551406780717e-06,
|
12664 |
+
"loss": 0.8601,
|
12665 |
+
"step": 1808
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 0.9248466257668712,
|
12669 |
+
"grad_norm": 2.9896671772003174,
|
12670 |
+
"learning_rate": 1.5398436762593061e-06,
|
12671 |
+
"loss": 0.8525,
|
12672 |
+
"step": 1809
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 0.9253578732106339,
|
12676 |
+
"grad_norm": 3.0496749877929688,
|
12677 |
+
"learning_rate": 1.519071056285487e-06,
|
12678 |
+
"loss": 0.9679,
|
12679 |
+
"step": 1810
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 0.9258691206543967,
|
12683 |
+
"grad_norm": 2.976200819015503,
|
12684 |
+
"learning_rate": 1.4984373402728014e-06,
|
12685 |
+
"loss": 0.8789,
|
12686 |
+
"step": 1811
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.9263803680981595,
|
12690 |
+
"grad_norm": 3.0850539207458496,
|
12691 |
+
"learning_rate": 1.4779425873394259e-06,
|
12692 |
+
"loss": 0.85,
|
12693 |
+
"step": 1812
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 0.9268916155419223,
|
12697 |
+
"grad_norm": 3.117995023727417,
|
12698 |
+
"learning_rate": 1.4575868562054228e-06,
|
12699 |
+
"loss": 0.8234,
|
12700 |
+
"step": 1813
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.9274028629856851,
|
12704 |
+
"grad_norm": 2.9029812812805176,
|
12705 |
+
"learning_rate": 1.4373702051925065e-06,
|
12706 |
+
"loss": 0.9211,
|
12707 |
+
"step": 1814
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 0.9279141104294478,
|
12711 |
+
"grad_norm": 3.243557929992676,
|
12712 |
+
"learning_rate": 1.4172926922239315e-06,
|
12713 |
+
"loss": 0.8936,
|
12714 |
+
"step": 1815
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 0.9284253578732107,
|
12718 |
+
"grad_norm": 2.932993173599243,
|
12719 |
+
"learning_rate": 1.3973543748243e-06,
|
12720 |
+
"loss": 0.8516,
|
12721 |
+
"step": 1816
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 0.9289366053169734,
|
12725 |
+
"grad_norm": 3.073636054992676,
|
12726 |
+
"learning_rate": 1.377555310119405e-06,
|
12727 |
+
"loss": 0.8922,
|
12728 |
+
"step": 1817
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 0.9294478527607362,
|
12732 |
+
"grad_norm": 3.3185694217681885,
|
12733 |
+
"learning_rate": 1.3578955548360473e-06,
|
12734 |
+
"loss": 0.8148,
|
12735 |
+
"step": 1818
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 0.929959100204499,
|
12739 |
+
"grad_norm": 3.1685848236083984,
|
12740 |
+
"learning_rate": 1.3383751653019029e-06,
|
12741 |
+
"loss": 0.9387,
|
12742 |
+
"step": 1819
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 0.9304703476482618,
|
12746 |
+
"grad_norm": 3.10244083404541,
|
12747 |
+
"learning_rate": 1.31899419744535e-06,
|
12748 |
+
"loss": 0.7719,
|
12749 |
+
"step": 1820
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 0.9309815950920245,
|
12753 |
+
"grad_norm": 3.1995668411254883,
|
12754 |
+
"learning_rate": 1.2997527067952875e-06,
|
12755 |
+
"loss": 0.9624,
|
12756 |
+
"step": 1821
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.9314928425357873,
|
12760 |
+
"grad_norm": 3.6962995529174805,
|
12761 |
+
"learning_rate": 1.2806507484810215e-06,
|
12762 |
+
"loss": 0.8737,
|
12763 |
+
"step": 1822
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 0.9320040899795501,
|
12767 |
+
"grad_norm": 3.9050567150115967,
|
12768 |
+
"learning_rate": 1.2616883772320508e-06,
|
12769 |
+
"loss": 1.0082,
|
12770 |
+
"step": 1823
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 0.9325153374233128,
|
12774 |
+
"grad_norm": 3.417041540145874,
|
12775 |
+
"learning_rate": 1.2428656473779721e-06,
|
12776 |
+
"loss": 0.8719,
|
12777 |
+
"step": 1824
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 0.9330265848670757,
|
12781 |
+
"grad_norm": 3.355666160583496,
|
12782 |
+
"learning_rate": 1.2241826128482625e-06,
|
12783 |
+
"loss": 0.9281,
|
12784 |
+
"step": 1825
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 0.9335378323108384,
|
12788 |
+
"grad_norm": 3.6375527381896973,
|
12789 |
+
"learning_rate": 1.20563932717217e-06,
|
12790 |
+
"loss": 0.8613,
|
12791 |
+
"step": 1826
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 0.9340490797546013,
|
12795 |
+
"grad_norm": 3.6801555156707764,
|
12796 |
+
"learning_rate": 1.1872358434785346e-06,
|
12797 |
+
"loss": 0.7958,
|
12798 |
+
"step": 1827
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 0.934560327198364,
|
12802 |
+
"grad_norm": 3.775987148284912,
|
12803 |
+
"learning_rate": 1.1689722144956671e-06,
|
12804 |
+
"loss": 0.8864,
|
12805 |
+
"step": 1828
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 0.9350715746421268,
|
12809 |
+
"grad_norm": 3.3887085914611816,
|
12810 |
+
"learning_rate": 1.1508484925511542e-06,
|
12811 |
+
"loss": 0.8428,
|
12812 |
+
"step": 1829
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 0.9355828220858896,
|
12816 |
+
"grad_norm": 3.701101064682007,
|
12817 |
+
"learning_rate": 1.132864729571731e-06,
|
12818 |
+
"loss": 0.8598,
|
12819 |
+
"step": 1830
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 0.9360940695296524,
|
12823 |
+
"grad_norm": 3.671170711517334,
|
12824 |
+
"learning_rate": 1.1150209770831588e-06,
|
12825 |
+
"loss": 0.8273,
|
12826 |
+
"step": 1831
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 0.9366053169734151,
|
12830 |
+
"grad_norm": 3.679232120513916,
|
12831 |
+
"learning_rate": 1.0973172862100145e-06,
|
12832 |
+
"loss": 0.899,
|
12833 |
+
"step": 1832
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 0.9371165644171779,
|
12837 |
+
"grad_norm": 3.9870572090148926,
|
12838 |
+
"learning_rate": 1.0797537076756127e-06,
|
12839 |
+
"loss": 0.9478,
|
12840 |
+
"step": 1833
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 0.9376278118609407,
|
12844 |
+
"grad_norm": 3.767942190170288,
|
12845 |
+
"learning_rate": 1.0623302918018108e-06,
|
12846 |
+
"loss": 0.8134,
|
12847 |
+
"step": 1834
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 0.9381390593047034,
|
12851 |
+
"grad_norm": 4.18915319442749,
|
12852 |
+
"learning_rate": 1.0450470885088937e-06,
|
12853 |
+
"loss": 0.9433,
|
12854 |
+
"step": 1835
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 0.9386503067484663,
|
12858 |
+
"grad_norm": 4.106919765472412,
|
12859 |
+
"learning_rate": 1.0279041473154116e-06,
|
12860 |
+
"loss": 0.8786,
|
12861 |
+
"step": 1836
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 0.939161554192229,
|
12865 |
+
"grad_norm": 4.100769519805908,
|
12866 |
+
"learning_rate": 1.010901517338042e-06,
|
12867 |
+
"loss": 0.8459,
|
12868 |
+
"step": 1837
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 0.9396728016359919,
|
12872 |
+
"grad_norm": 3.8171629905700684,
|
12873 |
+
"learning_rate": 9.94039247291456e-07,
|
12874 |
+
"loss": 0.7617,
|
12875 |
+
"step": 1838
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 0.9401840490797546,
|
12879 |
+
"grad_norm": 4.406624794006348,
|
12880 |
+
"learning_rate": 9.773173854881913e-07,
|
12881 |
+
"loss": 0.7923,
|
12882 |
+
"step": 1839
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 0.9406952965235174,
|
12886 |
+
"grad_norm": 4.20837926864624,
|
12887 |
+
"learning_rate": 9.607359798384785e-07,
|
12888 |
+
"loss": 0.9773,
|
12889 |
+
"step": 1840
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 0.9412065439672802,
|
12893 |
+
"grad_norm": 4.484842777252197,
|
12894 |
+
"learning_rate": 9.442950778501325e-07,
|
12895 |
+
"loss": 0.8711,
|
12896 |
+
"step": 1841
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 0.941717791411043,
|
12900 |
+
"grad_norm": 4.719925403594971,
|
12901 |
+
"learning_rate": 9.279947266284061e-07,
|
12902 |
+
"loss": 0.8392,
|
12903 |
+
"step": 1842
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 0.9422290388548057,
|
12907 |
+
"grad_norm": 4.476531982421875,
|
12908 |
+
"learning_rate": 9.118349728758468e-07,
|
12909 |
+
"loss": 0.767,
|
12910 |
+
"step": 1843
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 0.9427402862985685,
|
12914 |
+
"grad_norm": 4.48267126083374,
|
12915 |
+
"learning_rate": 8.958158628922019e-07,
|
12916 |
+
"loss": 0.6777,
|
12917 |
+
"step": 1844
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 0.9432515337423313,
|
12921 |
+
"grad_norm": 4.858860015869141,
|
12922 |
+
"learning_rate": 8.799374425742246e-07,
|
12923 |
+
"loss": 0.7469,
|
12924 |
+
"step": 1845
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 0.943762781186094,
|
12928 |
+
"grad_norm": 4.577967166900635,
|
12929 |
+
"learning_rate": 8.641997574155846e-07,
|
12930 |
+
"loss": 0.676,
|
12931 |
+
"step": 1846
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 0.9442740286298569,
|
12935 |
+
"grad_norm": 5.150705337524414,
|
12936 |
+
"learning_rate": 8.486028525067358e-07,
|
12937 |
+
"loss": 0.7106,
|
12938 |
+
"step": 1847
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 0.9447852760736196,
|
12942 |
+
"grad_norm": 4.760867595672607,
|
12943 |
+
"learning_rate": 8.331467725347708e-07,
|
12944 |
+
"loss": 0.7528,
|
12945 |
+
"step": 1848
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 0.9452965235173824,
|
12949 |
+
"grad_norm": 6.085768222808838,
|
12950 |
+
"learning_rate": 8.178315617832999e-07,
|
12951 |
+
"loss": 0.784,
|
12952 |
+
"step": 1849
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 0.9458077709611452,
|
12956 |
+
"grad_norm": 6.886128902435303,
|
12957 |
+
"learning_rate": 8.026572641323393e-07,
|
12958 |
+
"loss": 0.5051,
|
12959 |
+
"step": 1850
|
12960 |
+
},
|
12961 |
+
{
|
12962 |
+
"epoch": 0.946319018404908,
|
12963 |
+
"grad_norm": 2.127772808074951,
|
12964 |
+
"learning_rate": 7.876239230581506e-07,
|
12965 |
+
"loss": 0.9763,
|
12966 |
+
"step": 1851
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 0.9468302658486708,
|
12970 |
+
"grad_norm": 2.3633391857147217,
|
12971 |
+
"learning_rate": 7.727315816331515e-07,
|
12972 |
+
"loss": 1.012,
|
12973 |
+
"step": 1852
|
12974 |
+
},
|
12975 |
+
{
|
12976 |
+
"epoch": 0.9473415132924335,
|
12977 |
+
"grad_norm": 2.347792625427246,
|
12978 |
+
"learning_rate": 7.579802825257775e-07,
|
12979 |
+
"loss": 0.9064,
|
12980 |
+
"step": 1853
|
12981 |
+
},
|
12982 |
+
{
|
12983 |
+
"epoch": 0.9478527607361963,
|
12984 |
+
"grad_norm": 2.4126203060150146,
|
12985 |
+
"learning_rate": 7.43370068000343e-07,
|
12986 |
+
"loss": 0.8778,
|
12987 |
+
"step": 1854
|
12988 |
+
},
|
12989 |
+
{
|
12990 |
+
"epoch": 0.9483640081799591,
|
12991 |
+
"grad_norm": 2.6466588973999023,
|
12992 |
+
"learning_rate": 7.289009799169688e-07,
|
12993 |
+
"loss": 0.9782,
|
12994 |
+
"step": 1855
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 0.9488752556237219,
|
12998 |
+
"grad_norm": 2.6971752643585205,
|
12999 |
+
"learning_rate": 7.145730597314049e-07,
|
13000 |
+
"loss": 0.979,
|
13001 |
+
"step": 1856
|
13002 |
+
},
|
13003 |
+
{
|
13004 |
+
"epoch": 0.9493865030674846,
|
13005 |
+
"grad_norm": 2.6623589992523193,
|
13006 |
+
"learning_rate": 7.003863484949413e-07,
|
13007 |
+
"loss": 0.9151,
|
13008 |
+
"step": 1857
|
13009 |
+
},
|
13010 |
+
{
|
13011 |
+
"epoch": 0.9498977505112475,
|
13012 |
+
"grad_norm": 2.865541696548462,
|
13013 |
+
"learning_rate": 6.86340886854292e-07,
|
13014 |
+
"loss": 0.8949,
|
13015 |
+
"step": 1858
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 0.9504089979550102,
|
13019 |
+
"grad_norm": 3.0056710243225098,
|
13020 |
+
"learning_rate": 6.724367150514777e-07,
|
13021 |
+
"loss": 0.8936,
|
13022 |
+
"step": 1859
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 0.950920245398773,
|
13026 |
+
"grad_norm": 2.805217742919922,
|
13027 |
+
"learning_rate": 6.58673872923693e-07,
|
13028 |
+
"loss": 0.831,
|
13029 |
+
"step": 1860
|
13030 |
+
},
|
13031 |
+
{
|
13032 |
+
"epoch": 0.9514314928425358,
|
13033 |
+
"grad_norm": 3.1351966857910156,
|
13034 |
+
"learning_rate": 6.450523999032177e-07,
|
13035 |
+
"loss": 1.0224,
|
13036 |
+
"step": 1861
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 0.9519427402862985,
|
13040 |
+
"grad_norm": 2.93005633354187,
|
13041 |
+
"learning_rate": 6.315723350172775e-07,
|
13042 |
+
"loss": 0.8546,
|
13043 |
+
"step": 1862
|
13044 |
+
},
|
13045 |
+
{
|
13046 |
+
"epoch": 0.9524539877300614,
|
13047 |
+
"grad_norm": 3.181835651397705,
|
13048 |
+
"learning_rate": 6.182337168879671e-07,
|
13049 |
+
"loss": 0.903,
|
13050 |
+
"step": 1863
|
13051 |
+
},
|
13052 |
+
{
|
13053 |
+
"epoch": 0.9529652351738241,
|
13054 |
+
"grad_norm": 3.401155948638916,
|
13055 |
+
"learning_rate": 6.050365837320992e-07,
|
13056 |
+
"loss": 0.9181,
|
13057 |
+
"step": 1864
|
13058 |
+
},
|
13059 |
+
{
|
13060 |
+
"epoch": 0.9534764826175869,
|
13061 |
+
"grad_norm": 3.0578250885009766,
|
13062 |
+
"learning_rate": 5.919809733611171e-07,
|
13063 |
+
"loss": 0.8978,
|
13064 |
+
"step": 1865
|
13065 |
+
},
|
13066 |
+
{
|
13067 |
+
"epoch": 0.9539877300613497,
|
13068 |
+
"grad_norm": 3.142280101776123,
|
13069 |
+
"learning_rate": 5.790669231809875e-07,
|
13070 |
+
"loss": 0.9351,
|
13071 |
+
"step": 1866
|
13072 |
+
},
|
13073 |
+
{
|
13074 |
+
"epoch": 0.9544989775051125,
|
13075 |
+
"grad_norm": 3.1850273609161377,
|
13076 |
+
"learning_rate": 5.66294470192097e-07,
|
13077 |
+
"loss": 0.9621,
|
13078 |
+
"step": 1867
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 0.9550102249488752,
|
13082 |
+
"grad_norm": 3.274036169052124,
|
13083 |
+
"learning_rate": 5.536636509891225e-07,
|
13084 |
+
"loss": 0.8428,
|
13085 |
+
"step": 1868
|
13086 |
+
},
|
13087 |
+
{
|
13088 |
+
"epoch": 0.9555214723926381,
|
13089 |
+
"grad_norm": 3.323293924331665,
|
13090 |
+
"learning_rate": 5.411745017609493e-07,
|
13091 |
+
"loss": 0.9623,
|
13092 |
+
"step": 1869
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 0.9560327198364008,
|
13096 |
+
"grad_norm": 3.296380043029785,
|
13097 |
+
"learning_rate": 5.288270582905708e-07,
|
13098 |
+
"loss": 0.86,
|
13099 |
+
"step": 1870
|
13100 |
+
},
|
13101 |
+
{
|
13102 |
+
"epoch": 0.9565439672801636,
|
13103 |
+
"grad_norm": 3.7464077472686768,
|
13104 |
+
"learning_rate": 5.166213559549549e-07,
|
13105 |
+
"loss": 0.8665,
|
13106 |
+
"step": 1871
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 0.9570552147239264,
|
13110 |
+
"grad_norm": 3.5171990394592285,
|
13111 |
+
"learning_rate": 5.045574297249833e-07,
|
13112 |
+
"loss": 0.9063,
|
13113 |
+
"step": 1872
|
13114 |
+
},
|
13115 |
+
{
|
13116 |
+
"epoch": 0.9575664621676891,
|
13117 |
+
"grad_norm": 3.684037685394287,
|
13118 |
+
"learning_rate": 4.926353141653184e-07,
|
13119 |
+
"loss": 0.9079,
|
13120 |
+
"step": 1873
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 0.9580777096114519,
|
13124 |
+
"grad_norm": 3.424814462661743,
|
13125 |
+
"learning_rate": 4.80855043434325e-07,
|
13126 |
+
"loss": 0.8508,
|
13127 |
+
"step": 1874
|
13128 |
+
},
|
13129 |
+
{
|
13130 |
+
"epoch": 0.9585889570552147,
|
13131 |
+
"grad_norm": 3.3258156776428223,
|
13132 |
+
"learning_rate": 4.692166512839491e-07,
|
13133 |
+
"loss": 0.8615,
|
13134 |
+
"step": 1875
|
13135 |
+
},
|
13136 |
+
{
|
13137 |
+
"epoch": 0.9591002044989775,
|
13138 |
+
"grad_norm": 3.7672901153564453,
|
13139 |
+
"learning_rate": 4.577201710596612e-07,
|
13140 |
+
"loss": 0.9087,
|
13141 |
+
"step": 1876
|
13142 |
+
},
|
13143 |
+
{
|
13144 |
+
"epoch": 0.9596114519427403,
|
13145 |
+
"grad_norm": 3.638936996459961,
|
13146 |
+
"learning_rate": 4.4636563570031873e-07,
|
13147 |
+
"loss": 0.921,
|
13148 |
+
"step": 1877
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 0.9601226993865031,
|
13152 |
+
"grad_norm": 3.7481777667999268,
|
13153 |
+
"learning_rate": 4.3515307773809855e-07,
|
13154 |
+
"loss": 0.8998,
|
13155 |
+
"step": 1878
|
13156 |
+
},
|
13157 |
+
{
|
13158 |
+
"epoch": 0.9606339468302658,
|
13159 |
+
"grad_norm": 4.0111212730407715,
|
13160 |
+
"learning_rate": 4.240825292983808e-07,
|
13161 |
+
"loss": 0.8379,
|
13162 |
+
"step": 1879
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 0.9611451942740287,
|
13166 |
+
"grad_norm": 3.8063735961914062,
|
13167 |
+
"learning_rate": 4.131540220996877e-07,
|
13168 |
+
"loss": 1.0216,
|
13169 |
+
"step": 1880
|
13170 |
+
},
|
13171 |
+
{
|
13172 |
+
"epoch": 0.9616564417177914,
|
13173 |
+
"grad_norm": 3.818110942840576,
|
13174 |
+
"learning_rate": 4.023675874535671e-07,
|
13175 |
+
"loss": 0.9189,
|
13176 |
+
"step": 1881
|
13177 |
+
},
|
13178 |
+
{
|
13179 |
+
"epoch": 0.9621676891615542,
|
13180 |
+
"grad_norm": 3.307727098464966,
|
13181 |
+
"learning_rate": 3.917232562645035e-07,
|
13182 |
+
"loss": 0.7723,
|
13183 |
+
"step": 1882
|
13184 |
+
},
|
13185 |
+
{
|
13186 |
+
"epoch": 0.962678936605317,
|
13187 |
+
"grad_norm": 3.7070207595825195,
|
13188 |
+
"learning_rate": 3.812210590298515e-07,
|
13189 |
+
"loss": 0.8208,
|
13190 |
+
"step": 1883
|
13191 |
+
},
|
13192 |
+
{
|
13193 |
+
"epoch": 0.9631901840490797,
|
13194 |
+
"grad_norm": 3.999943971633911,
|
13195 |
+
"learning_rate": 3.7086102583972494e-07,
|
13196 |
+
"loss": 0.9011,
|
13197 |
+
"step": 1884
|
13198 |
+
},
|
13199 |
+
{
|
13200 |
+
"epoch": 0.9637014314928425,
|
13201 |
+
"grad_norm": 3.9105939865112305,
|
13202 |
+
"learning_rate": 3.6064318637693e-07,
|
13203 |
+
"loss": 0.9319,
|
13204 |
+
"step": 1885
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 0.9642126789366053,
|
13208 |
+
"grad_norm": 3.4953696727752686,
|
13209 |
+
"learning_rate": 3.505675699168487e-07,
|
13210 |
+
"loss": 0.836,
|
13211 |
+
"step": 1886
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 0.9647239263803681,
|
13215 |
+
"grad_norm": 4.094799995422363,
|
13216 |
+
"learning_rate": 3.406342053274003e-07,
|
13217 |
+
"loss": 0.7909,
|
13218 |
+
"step": 1887
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 0.9652351738241309,
|
13222 |
+
"grad_norm": 3.863297700881958,
|
13223 |
+
"learning_rate": 3.3084312106892446e-07,
|
13224 |
+
"loss": 0.8657,
|
13225 |
+
"step": 1888
|
13226 |
+
},
|
13227 |
+
{
|
13228 |
+
"epoch": 0.9657464212678937,
|
13229 |
+
"grad_norm": 4.07185173034668,
|
13230 |
+
"learning_rate": 3.211943451941035e-07,
|
13231 |
+
"loss": 0.8233,
|
13232 |
+
"step": 1889
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 0.9662576687116564,
|
13236 |
+
"grad_norm": 4.35256814956665,
|
13237 |
+
"learning_rate": 3.1168790534789605e-07,
|
13238 |
+
"loss": 0.9713,
|
13239 |
+
"step": 1890
|
13240 |
+
},
|
13241 |
+
{
|
13242 |
+
"epoch": 0.9667689161554193,
|
13243 |
+
"grad_norm": 3.9577109813690186,
|
13244 |
+
"learning_rate": 3.023238287674479e-07,
|
13245 |
+
"loss": 0.8875,
|
13246 |
+
"step": 1891
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 0.967280163599182,
|
13250 |
+
"grad_norm": 4.198614597320557,
|
13251 |
+
"learning_rate": 2.9310214228202013e-07,
|
13252 |
+
"loss": 0.7918,
|
13253 |
+
"step": 1892
|
13254 |
+
},
|
13255 |
+
{
|
13256 |
+
"epoch": 0.9677914110429447,
|
13257 |
+
"grad_norm": 4.020992279052734,
|
13258 |
+
"learning_rate": 2.840228723129001e-07,
|
13259 |
+
"loss": 0.7969,
|
13260 |
+
"step": 1893
|
13261 |
+
},
|
13262 |
+
{
|
13263 |
+
"epoch": 0.9683026584867076,
|
13264 |
+
"grad_norm": 4.857772350311279,
|
13265 |
+
"learning_rate": 2.750860448733461e-07,
|
13266 |
+
"loss": 0.7989,
|
13267 |
+
"step": 1894
|
13268 |
+
},
|
13269 |
+
{
|
13270 |
+
"epoch": 0.9688139059304703,
|
13271 |
+
"grad_norm": 4.296475887298584,
|
13272 |
+
"learning_rate": 2.662916855684816e-07,
|
13273 |
+
"loss": 0.7896,
|
13274 |
+
"step": 1895
|
13275 |
+
},
|
13276 |
+
{
|
13277 |
+
"epoch": 0.9693251533742331,
|
13278 |
+
"grad_norm": 4.845669269561768,
|
13279 |
+
"learning_rate": 2.5763981959526786e-07,
|
13280 |
+
"loss": 0.9473,
|
13281 |
+
"step": 1896
|
13282 |
+
},
|
13283 |
+
{
|
13284 |
+
"epoch": 0.9698364008179959,
|
13285 |
+
"grad_norm": 5.276214599609375,
|
13286 |
+
"learning_rate": 2.4913047174237035e-07,
|
13287 |
+
"loss": 0.8389,
|
13288 |
+
"step": 1897
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 0.9703476482617587,
|
13292 |
+
"grad_norm": 5.026844501495361,
|
13293 |
+
"learning_rate": 2.407636663901591e-07,
|
13294 |
+
"loss": 0.6934,
|
13295 |
+
"step": 1898
|
13296 |
+
},
|
13297 |
+
{
|
13298 |
+
"epoch": 0.9708588957055214,
|
13299 |
+
"grad_norm": 5.138829231262207,
|
13300 |
+
"learning_rate": 2.3253942751056968e-07,
|
13301 |
+
"loss": 0.5662,
|
13302 |
+
"step": 1899
|
13303 |
+
},
|
13304 |
+
{
|
13305 |
+
"epoch": 0.9713701431492843,
|
13306 |
+
"grad_norm": 8.76285457611084,
|
13307 |
+
"learning_rate": 2.2445777866709205e-07,
|
13308 |
+
"loss": 0.8929,
|
13309 |
+
"step": 1900
|
13310 |
+
},
|
13311 |
+
{
|
13312 |
+
"epoch": 0.971881390593047,
|
13313 |
+
"grad_norm": 2.1116087436676025,
|
13314 |
+
"learning_rate": 2.1651874301465979e-07,
|
13315 |
+
"loss": 0.8929,
|
13316 |
+
"step": 1901
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 0.9723926380368099,
|
13320 |
+
"grad_norm": 2.1844208240509033,
|
13321 |
+
"learning_rate": 2.087223432996166e-07,
|
13322 |
+
"loss": 0.8937,
|
13323 |
+
"step": 1902
|
13324 |
+
},
|
13325 |
+
{
|
13326 |
+
"epoch": 0.9729038854805726,
|
13327 |
+
"grad_norm": 2.286012887954712,
|
13328 |
+
"learning_rate": 2.0106860185962194e-07,
|
13329 |
+
"loss": 0.8114,
|
13330 |
+
"step": 1903
|
13331 |
+
},
|
13332 |
+
{
|
13333 |
+
"epoch": 0.9734151329243353,
|
13334 |
+
"grad_norm": 2.505859613418579,
|
13335 |
+
"learning_rate": 1.935575406236123e-07,
|
13336 |
+
"loss": 0.9787,
|
13337 |
+
"step": 1904
|
13338 |
+
},
|
13339 |
+
{
|
13340 |
+
"epoch": 0.9739263803680982,
|
13341 |
+
"grad_norm": 2.708432674407959,
|
13342 |
+
"learning_rate": 1.861891811117178e-07,
|
13343 |
+
"loss": 0.9853,
|
13344 |
+
"step": 1905
|
13345 |
+
},
|
13346 |
+
{
|
13347 |
+
"epoch": 0.9744376278118609,
|
13348 |
+
"grad_norm": 2.945042610168457,
|
13349 |
+
"learning_rate": 1.7896354443521778e-07,
|
13350 |
+
"loss": 0.9986,
|
13351 |
+
"step": 1906
|
13352 |
+
},
|
13353 |
+
{
|
13354 |
+
"epoch": 0.9749488752556237,
|
13355 |
+
"grad_norm": 2.5554847717285156,
|
13356 |
+
"learning_rate": 1.7188065129647435e-07,
|
13357 |
+
"loss": 0.9626,
|
13358 |
+
"step": 1907
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 0.9754601226993865,
|
13362 |
+
"grad_norm": 2.838167190551758,
|
13363 |
+
"learning_rate": 1.6494052198886555e-07,
|
13364 |
+
"loss": 0.9229,
|
13365 |
+
"step": 1908
|
13366 |
+
},
|
13367 |
+
{
|
13368 |
+
"epoch": 0.9759713701431493,
|
13369 |
+
"grad_norm": 2.6479218006134033,
|
13370 |
+
"learning_rate": 1.5814317639673005e-07,
|
13371 |
+
"loss": 0.8479,
|
13372 |
+
"step": 1909
|
13373 |
+
},
|
13374 |
+
{
|
13375 |
+
"epoch": 0.976482617586912,
|
13376 |
+
"grad_norm": 3.0106728076934814,
|
13377 |
+
"learning_rate": 1.5148863399532254e-07,
|
13378 |
+
"loss": 0.8836,
|
13379 |
+
"step": 1910
|
13380 |
+
},
|
13381 |
+
{
|
13382 |
+
"epoch": 0.9769938650306749,
|
13383 |
+
"grad_norm": 3.02433705329895,
|
13384 |
+
"learning_rate": 1.4497691385074175e-07,
|
13385 |
+
"loss": 0.9304,
|
13386 |
+
"step": 1911
|
13387 |
+
},
|
13388 |
+
{
|
13389 |
+
"epoch": 0.9775051124744376,
|
13390 |
+
"grad_norm": 3.085106134414673,
|
13391 |
+
"learning_rate": 1.3860803461989146e-07,
|
13392 |
+
"loss": 0.8676,
|
13393 |
+
"step": 1912
|
13394 |
}
|
13395 |
],
|
13396 |
"logging_steps": 1,
|
|
|
13410 |
"attributes": {}
|
13411 |
}
|
13412 |
},
|
13413 |
+
"total_flos": 7.310480002095514e+17,
|
13414 |
"train_batch_size": 8,
|
13415 |
"trial_name": null,
|
13416 |
"trial_params": null
|