gabrielaltay
commited on
Commit
•
c90ae0e
1
Parent(s):
294a57c
Training in progress, step 9099, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 654946216
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9557bc9f6cbee3ba0a056668f0757dc066d4f66def7e43b0358cae0ee9de6e67
|
3 |
size 654946216
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1310000698
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2e156708c40a75d36123ea5629230da104e836b70e9cff0007571ec0cbb6f6a
|
3 |
size 1310000698
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f44032b1543a896a398ba18329beb5bf8b1f128caeeac11d091c47bd6a192c0e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14161,6 +14161,1770 @@
|
|
14161 |
"learning_rate": 9.988127040664887e-06,
|
14162 |
"loss": 2.4894,
|
14163 |
"step": 8088
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14164 |
}
|
14165 |
],
|
14166 |
"logging_steps": 4,
|
@@ -14168,7 +15932,7 @@
|
|
14168 |
"num_input_tokens_seen": 0,
|
14169 |
"num_train_epochs": 1,
|
14170 |
"save_steps": 1011,
|
14171 |
-
"total_flos":
|
14172 |
"train_batch_size": 4,
|
14173 |
"trial_name": null,
|
14174 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9002671415850401,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 9099,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14161 |
"learning_rate": 9.988127040664887e-06,
|
14162 |
"loss": 2.4894,
|
14163 |
"step": 8088
|
14164 |
+
},
|
14165 |
+
{
|
14166 |
+
"epoch": 0.8,
|
14167 |
+
"grad_norm": 1.6651523113250732,
|
14168 |
+
"learning_rate": 9.968338775106362e-06,
|
14169 |
+
"loss": 2.5608,
|
14170 |
+
"step": 8092
|
14171 |
+
},
|
14172 |
+
{
|
14173 |
+
"epoch": 0.8,
|
14174 |
+
"grad_norm": 1.5435396432876587,
|
14175 |
+
"learning_rate": 9.948550509547838e-06,
|
14176 |
+
"loss": 2.5702,
|
14177 |
+
"step": 8096
|
14178 |
+
},
|
14179 |
+
{
|
14180 |
+
"epoch": 0.8,
|
14181 |
+
"grad_norm": 1.6652929782867432,
|
14182 |
+
"learning_rate": 9.928762243989314e-06,
|
14183 |
+
"loss": 2.5155,
|
14184 |
+
"step": 8100
|
14185 |
+
},
|
14186 |
+
{
|
14187 |
+
"epoch": 0.8,
|
14188 |
+
"grad_norm": 1.5997744798660278,
|
14189 |
+
"learning_rate": 9.90897397843079e-06,
|
14190 |
+
"loss": 2.412,
|
14191 |
+
"step": 8104
|
14192 |
+
},
|
14193 |
+
{
|
14194 |
+
"epoch": 0.8,
|
14195 |
+
"grad_norm": 1.518398404121399,
|
14196 |
+
"learning_rate": 9.889185712872267e-06,
|
14197 |
+
"loss": 2.4749,
|
14198 |
+
"step": 8108
|
14199 |
+
},
|
14200 |
+
{
|
14201 |
+
"epoch": 0.8,
|
14202 |
+
"grad_norm": 1.6166468858718872,
|
14203 |
+
"learning_rate": 9.869397447313745e-06,
|
14204 |
+
"loss": 2.528,
|
14205 |
+
"step": 8112
|
14206 |
+
},
|
14207 |
+
{
|
14208 |
+
"epoch": 0.8,
|
14209 |
+
"grad_norm": 1.7324260473251343,
|
14210 |
+
"learning_rate": 9.84960918175522e-06,
|
14211 |
+
"loss": 2.4854,
|
14212 |
+
"step": 8116
|
14213 |
+
},
|
14214 |
+
{
|
14215 |
+
"epoch": 0.8,
|
14216 |
+
"grad_norm": 1.6371716260910034,
|
14217 |
+
"learning_rate": 9.829820916196696e-06,
|
14218 |
+
"loss": 2.5037,
|
14219 |
+
"step": 8120
|
14220 |
+
},
|
14221 |
+
{
|
14222 |
+
"epoch": 0.8,
|
14223 |
+
"grad_norm": 1.711093544960022,
|
14224 |
+
"learning_rate": 9.810032650638172e-06,
|
14225 |
+
"loss": 2.6155,
|
14226 |
+
"step": 8124
|
14227 |
+
},
|
14228 |
+
{
|
14229 |
+
"epoch": 0.8,
|
14230 |
+
"grad_norm": 1.6455191373825073,
|
14231 |
+
"learning_rate": 9.790244385079648e-06,
|
14232 |
+
"loss": 2.4078,
|
14233 |
+
"step": 8128
|
14234 |
+
},
|
14235 |
+
{
|
14236 |
+
"epoch": 0.8,
|
14237 |
+
"grad_norm": 1.6366294622421265,
|
14238 |
+
"learning_rate": 9.770456119521125e-06,
|
14239 |
+
"loss": 2.4642,
|
14240 |
+
"step": 8132
|
14241 |
+
},
|
14242 |
+
{
|
14243 |
+
"epoch": 0.8,
|
14244 |
+
"grad_norm": 1.668990969657898,
|
14245 |
+
"learning_rate": 9.7506678539626e-06,
|
14246 |
+
"loss": 2.5074,
|
14247 |
+
"step": 8136
|
14248 |
+
},
|
14249 |
+
{
|
14250 |
+
"epoch": 0.81,
|
14251 |
+
"grad_norm": 1.5362697839736938,
|
14252 |
+
"learning_rate": 9.730879588404076e-06,
|
14253 |
+
"loss": 2.4896,
|
14254 |
+
"step": 8140
|
14255 |
+
},
|
14256 |
+
{
|
14257 |
+
"epoch": 0.81,
|
14258 |
+
"grad_norm": 1.8022223711013794,
|
14259 |
+
"learning_rate": 9.711091322845554e-06,
|
14260 |
+
"loss": 2.5353,
|
14261 |
+
"step": 8144
|
14262 |
+
},
|
14263 |
+
{
|
14264 |
+
"epoch": 0.81,
|
14265 |
+
"grad_norm": 1.6878408193588257,
|
14266 |
+
"learning_rate": 9.69130305728703e-06,
|
14267 |
+
"loss": 2.5586,
|
14268 |
+
"step": 8148
|
14269 |
+
},
|
14270 |
+
{
|
14271 |
+
"epoch": 0.81,
|
14272 |
+
"grad_norm": 1.689186692237854,
|
14273 |
+
"learning_rate": 9.671514791728506e-06,
|
14274 |
+
"loss": 2.655,
|
14275 |
+
"step": 8152
|
14276 |
+
},
|
14277 |
+
{
|
14278 |
+
"epoch": 0.81,
|
14279 |
+
"grad_norm": 1.6996088027954102,
|
14280 |
+
"learning_rate": 9.651726526169981e-06,
|
14281 |
+
"loss": 2.7126,
|
14282 |
+
"step": 8156
|
14283 |
+
},
|
14284 |
+
{
|
14285 |
+
"epoch": 0.81,
|
14286 |
+
"grad_norm": 2.8080925941467285,
|
14287 |
+
"learning_rate": 9.631938260611457e-06,
|
14288 |
+
"loss": 2.7099,
|
14289 |
+
"step": 8160
|
14290 |
+
},
|
14291 |
+
{
|
14292 |
+
"epoch": 0.81,
|
14293 |
+
"grad_norm": 1.7663508653640747,
|
14294 |
+
"learning_rate": 9.612149995052934e-06,
|
14295 |
+
"loss": 2.4897,
|
14296 |
+
"step": 8164
|
14297 |
+
},
|
14298 |
+
{
|
14299 |
+
"epoch": 0.81,
|
14300 |
+
"grad_norm": 1.7868987321853638,
|
14301 |
+
"learning_rate": 9.59236172949441e-06,
|
14302 |
+
"loss": 2.4844,
|
14303 |
+
"step": 8168
|
14304 |
+
},
|
14305 |
+
{
|
14306 |
+
"epoch": 0.81,
|
14307 |
+
"grad_norm": 1.6591540575027466,
|
14308 |
+
"learning_rate": 9.572573463935886e-06,
|
14309 |
+
"loss": 2.4604,
|
14310 |
+
"step": 8172
|
14311 |
+
},
|
14312 |
+
{
|
14313 |
+
"epoch": 0.81,
|
14314 |
+
"grad_norm": 1.7677662372589111,
|
14315 |
+
"learning_rate": 9.552785198377363e-06,
|
14316 |
+
"loss": 2.5354,
|
14317 |
+
"step": 8176
|
14318 |
+
},
|
14319 |
+
{
|
14320 |
+
"epoch": 0.81,
|
14321 |
+
"grad_norm": 1.6505751609802246,
|
14322 |
+
"learning_rate": 9.532996932818839e-06,
|
14323 |
+
"loss": 2.5834,
|
14324 |
+
"step": 8180
|
14325 |
+
},
|
14326 |
+
{
|
14327 |
+
"epoch": 0.81,
|
14328 |
+
"grad_norm": 1.7923264503479004,
|
14329 |
+
"learning_rate": 9.513208667260315e-06,
|
14330 |
+
"loss": 2.5445,
|
14331 |
+
"step": 8184
|
14332 |
+
},
|
14333 |
+
{
|
14334 |
+
"epoch": 0.81,
|
14335 |
+
"grad_norm": 1.6527979373931885,
|
14336 |
+
"learning_rate": 9.493420401701792e-06,
|
14337 |
+
"loss": 2.3743,
|
14338 |
+
"step": 8188
|
14339 |
+
},
|
14340 |
+
{
|
14341 |
+
"epoch": 0.81,
|
14342 |
+
"grad_norm": 1.490796446800232,
|
14343 |
+
"learning_rate": 9.473632136143268e-06,
|
14344 |
+
"loss": 2.4511,
|
14345 |
+
"step": 8192
|
14346 |
+
},
|
14347 |
+
{
|
14348 |
+
"epoch": 0.81,
|
14349 |
+
"grad_norm": 1.6742923259735107,
|
14350 |
+
"learning_rate": 9.453843870584744e-06,
|
14351 |
+
"loss": 2.5265,
|
14352 |
+
"step": 8196
|
14353 |
+
},
|
14354 |
+
{
|
14355 |
+
"epoch": 0.81,
|
14356 |
+
"grad_norm": 1.6631675958633423,
|
14357 |
+
"learning_rate": 9.434055605026219e-06,
|
14358 |
+
"loss": 2.3355,
|
14359 |
+
"step": 8200
|
14360 |
+
},
|
14361 |
+
{
|
14362 |
+
"epoch": 0.81,
|
14363 |
+
"grad_norm": 1.9126391410827637,
|
14364 |
+
"learning_rate": 9.414267339467695e-06,
|
14365 |
+
"loss": 2.5204,
|
14366 |
+
"step": 8204
|
14367 |
+
},
|
14368 |
+
{
|
14369 |
+
"epoch": 0.81,
|
14370 |
+
"grad_norm": 1.6832706928253174,
|
14371 |
+
"learning_rate": 9.394479073909172e-06,
|
14372 |
+
"loss": 2.541,
|
14373 |
+
"step": 8208
|
14374 |
+
},
|
14375 |
+
{
|
14376 |
+
"epoch": 0.81,
|
14377 |
+
"grad_norm": 1.8622807264328003,
|
14378 |
+
"learning_rate": 9.37469080835065e-06,
|
14379 |
+
"loss": 2.6101,
|
14380 |
+
"step": 8212
|
14381 |
+
},
|
14382 |
+
{
|
14383 |
+
"epoch": 0.81,
|
14384 |
+
"grad_norm": 1.7272640466690063,
|
14385 |
+
"learning_rate": 9.354902542792126e-06,
|
14386 |
+
"loss": 2.7245,
|
14387 |
+
"step": 8216
|
14388 |
+
},
|
14389 |
+
{
|
14390 |
+
"epoch": 0.81,
|
14391 |
+
"grad_norm": 1.6699191331863403,
|
14392 |
+
"learning_rate": 9.3351142772336e-06,
|
14393 |
+
"loss": 2.4707,
|
14394 |
+
"step": 8220
|
14395 |
+
},
|
14396 |
+
{
|
14397 |
+
"epoch": 0.81,
|
14398 |
+
"grad_norm": 1.599859595298767,
|
14399 |
+
"learning_rate": 9.315326011675077e-06,
|
14400 |
+
"loss": 2.5186,
|
14401 |
+
"step": 8224
|
14402 |
+
},
|
14403 |
+
{
|
14404 |
+
"epoch": 0.81,
|
14405 |
+
"grad_norm": 1.6403051614761353,
|
14406 |
+
"learning_rate": 9.295537746116553e-06,
|
14407 |
+
"loss": 2.6446,
|
14408 |
+
"step": 8228
|
14409 |
+
},
|
14410 |
+
{
|
14411 |
+
"epoch": 0.81,
|
14412 |
+
"grad_norm": 1.6128116846084595,
|
14413 |
+
"learning_rate": 9.27574948055803e-06,
|
14414 |
+
"loss": 2.6126,
|
14415 |
+
"step": 8232
|
14416 |
+
},
|
14417 |
+
{
|
14418 |
+
"epoch": 0.81,
|
14419 |
+
"grad_norm": 1.588544487953186,
|
14420 |
+
"learning_rate": 9.255961214999506e-06,
|
14421 |
+
"loss": 2.6034,
|
14422 |
+
"step": 8236
|
14423 |
+
},
|
14424 |
+
{
|
14425 |
+
"epoch": 0.82,
|
14426 |
+
"grad_norm": 1.7381192445755005,
|
14427 |
+
"learning_rate": 9.236172949440982e-06,
|
14428 |
+
"loss": 2.6789,
|
14429 |
+
"step": 8240
|
14430 |
+
},
|
14431 |
+
{
|
14432 |
+
"epoch": 0.82,
|
14433 |
+
"grad_norm": 1.6580698490142822,
|
14434 |
+
"learning_rate": 9.216384683882458e-06,
|
14435 |
+
"loss": 2.6275,
|
14436 |
+
"step": 8244
|
14437 |
+
},
|
14438 |
+
{
|
14439 |
+
"epoch": 0.82,
|
14440 |
+
"grad_norm": 1.779470443725586,
|
14441 |
+
"learning_rate": 9.196596418323935e-06,
|
14442 |
+
"loss": 2.6094,
|
14443 |
+
"step": 8248
|
14444 |
+
},
|
14445 |
+
{
|
14446 |
+
"epoch": 0.82,
|
14447 |
+
"grad_norm": 1.6793197393417358,
|
14448 |
+
"learning_rate": 9.176808152765411e-06,
|
14449 |
+
"loss": 2.3446,
|
14450 |
+
"step": 8252
|
14451 |
+
},
|
14452 |
+
{
|
14453 |
+
"epoch": 0.82,
|
14454 |
+
"grad_norm": 1.7751526832580566,
|
14455 |
+
"learning_rate": 9.157019887206887e-06,
|
14456 |
+
"loss": 2.5427,
|
14457 |
+
"step": 8256
|
14458 |
+
},
|
14459 |
+
{
|
14460 |
+
"epoch": 0.82,
|
14461 |
+
"grad_norm": 1.687898874282837,
|
14462 |
+
"learning_rate": 9.137231621648364e-06,
|
14463 |
+
"loss": 2.5313,
|
14464 |
+
"step": 8260
|
14465 |
+
},
|
14466 |
+
{
|
14467 |
+
"epoch": 0.82,
|
14468 |
+
"grad_norm": 1.5011862516403198,
|
14469 |
+
"learning_rate": 9.117443356089838e-06,
|
14470 |
+
"loss": 2.4652,
|
14471 |
+
"step": 8264
|
14472 |
+
},
|
14473 |
+
{
|
14474 |
+
"epoch": 0.82,
|
14475 |
+
"grad_norm": 1.7039753198623657,
|
14476 |
+
"learning_rate": 9.097655090531315e-06,
|
14477 |
+
"loss": 2.5125,
|
14478 |
+
"step": 8268
|
14479 |
+
},
|
14480 |
+
{
|
14481 |
+
"epoch": 0.82,
|
14482 |
+
"grad_norm": 1.7907326221466064,
|
14483 |
+
"learning_rate": 9.077866824972791e-06,
|
14484 |
+
"loss": 2.4032,
|
14485 |
+
"step": 8272
|
14486 |
+
},
|
14487 |
+
{
|
14488 |
+
"epoch": 0.82,
|
14489 |
+
"grad_norm": 1.689347267150879,
|
14490 |
+
"learning_rate": 9.058078559414267e-06,
|
14491 |
+
"loss": 2.4636,
|
14492 |
+
"step": 8276
|
14493 |
+
},
|
14494 |
+
{
|
14495 |
+
"epoch": 0.82,
|
14496 |
+
"grad_norm": 1.7220097780227661,
|
14497 |
+
"learning_rate": 9.038290293855745e-06,
|
14498 |
+
"loss": 2.4757,
|
14499 |
+
"step": 8280
|
14500 |
+
},
|
14501 |
+
{
|
14502 |
+
"epoch": 0.82,
|
14503 |
+
"grad_norm": 1.8284951448440552,
|
14504 |
+
"learning_rate": 9.01850202829722e-06,
|
14505 |
+
"loss": 2.473,
|
14506 |
+
"step": 8284
|
14507 |
+
},
|
14508 |
+
{
|
14509 |
+
"epoch": 0.82,
|
14510 |
+
"grad_norm": 1.7803459167480469,
|
14511 |
+
"learning_rate": 8.998713762738696e-06,
|
14512 |
+
"loss": 2.4781,
|
14513 |
+
"step": 8288
|
14514 |
+
},
|
14515 |
+
{
|
14516 |
+
"epoch": 0.82,
|
14517 |
+
"grad_norm": 1.6893064975738525,
|
14518 |
+
"learning_rate": 8.978925497180173e-06,
|
14519 |
+
"loss": 2.5115,
|
14520 |
+
"step": 8292
|
14521 |
+
},
|
14522 |
+
{
|
14523 |
+
"epoch": 0.82,
|
14524 |
+
"grad_norm": 1.8316203355789185,
|
14525 |
+
"learning_rate": 8.959137231621649e-06,
|
14526 |
+
"loss": 2.5759,
|
14527 |
+
"step": 8296
|
14528 |
+
},
|
14529 |
+
{
|
14530 |
+
"epoch": 0.82,
|
14531 |
+
"grad_norm": 1.6714107990264893,
|
14532 |
+
"learning_rate": 8.939348966063125e-06,
|
14533 |
+
"loss": 2.5676,
|
14534 |
+
"step": 8300
|
14535 |
+
},
|
14536 |
+
{
|
14537 |
+
"epoch": 0.82,
|
14538 |
+
"grad_norm": 1.5850313901901245,
|
14539 |
+
"learning_rate": 8.9195607005046e-06,
|
14540 |
+
"loss": 2.3758,
|
14541 |
+
"step": 8304
|
14542 |
+
},
|
14543 |
+
{
|
14544 |
+
"epoch": 0.82,
|
14545 |
+
"grad_norm": 1.595004677772522,
|
14546 |
+
"learning_rate": 8.899772434946078e-06,
|
14547 |
+
"loss": 2.4782,
|
14548 |
+
"step": 8308
|
14549 |
+
},
|
14550 |
+
{
|
14551 |
+
"epoch": 0.82,
|
14552 |
+
"grad_norm": 1.756579041481018,
|
14553 |
+
"learning_rate": 8.879984169387554e-06,
|
14554 |
+
"loss": 2.4742,
|
14555 |
+
"step": 8312
|
14556 |
+
},
|
14557 |
+
{
|
14558 |
+
"epoch": 0.82,
|
14559 |
+
"grad_norm": 1.601770043373108,
|
14560 |
+
"learning_rate": 8.86019590382903e-06,
|
14561 |
+
"loss": 2.3767,
|
14562 |
+
"step": 8316
|
14563 |
+
},
|
14564 |
+
{
|
14565 |
+
"epoch": 0.82,
|
14566 |
+
"grad_norm": 1.8463127613067627,
|
14567 |
+
"learning_rate": 8.840407638270507e-06,
|
14568 |
+
"loss": 2.5466,
|
14569 |
+
"step": 8320
|
14570 |
+
},
|
14571 |
+
{
|
14572 |
+
"epoch": 0.82,
|
14573 |
+
"grad_norm": 1.8509941101074219,
|
14574 |
+
"learning_rate": 8.820619372711983e-06,
|
14575 |
+
"loss": 2.606,
|
14576 |
+
"step": 8324
|
14577 |
+
},
|
14578 |
+
{
|
14579 |
+
"epoch": 0.82,
|
14580 |
+
"grad_norm": 1.6294124126434326,
|
14581 |
+
"learning_rate": 8.800831107153458e-06,
|
14582 |
+
"loss": 2.4114,
|
14583 |
+
"step": 8328
|
14584 |
+
},
|
14585 |
+
{
|
14586 |
+
"epoch": 0.82,
|
14587 |
+
"grad_norm": 1.5632402896881104,
|
14588 |
+
"learning_rate": 8.781042841594934e-06,
|
14589 |
+
"loss": 2.3389,
|
14590 |
+
"step": 8332
|
14591 |
+
},
|
14592 |
+
{
|
14593 |
+
"epoch": 0.82,
|
14594 |
+
"grad_norm": 1.602977991104126,
|
14595 |
+
"learning_rate": 8.76125457603641e-06,
|
14596 |
+
"loss": 2.5443,
|
14597 |
+
"step": 8336
|
14598 |
+
},
|
14599 |
+
{
|
14600 |
+
"epoch": 0.83,
|
14601 |
+
"grad_norm": 1.8994452953338623,
|
14602 |
+
"learning_rate": 8.741466310477887e-06,
|
14603 |
+
"loss": 2.4973,
|
14604 |
+
"step": 8340
|
14605 |
+
},
|
14606 |
+
{
|
14607 |
+
"epoch": 0.83,
|
14608 |
+
"grad_norm": 1.703715205192566,
|
14609 |
+
"learning_rate": 8.721678044919363e-06,
|
14610 |
+
"loss": 2.3838,
|
14611 |
+
"step": 8344
|
14612 |
+
},
|
14613 |
+
{
|
14614 |
+
"epoch": 0.83,
|
14615 |
+
"grad_norm": 1.7744590044021606,
|
14616 |
+
"learning_rate": 8.70188977936084e-06,
|
14617 |
+
"loss": 2.2877,
|
14618 |
+
"step": 8348
|
14619 |
+
},
|
14620 |
+
{
|
14621 |
+
"epoch": 0.83,
|
14622 |
+
"grad_norm": 1.7415697574615479,
|
14623 |
+
"learning_rate": 8.682101513802316e-06,
|
14624 |
+
"loss": 2.5678,
|
14625 |
+
"step": 8352
|
14626 |
+
},
|
14627 |
+
{
|
14628 |
+
"epoch": 0.83,
|
14629 |
+
"grad_norm": 1.6912627220153809,
|
14630 |
+
"learning_rate": 8.662313248243792e-06,
|
14631 |
+
"loss": 2.3746,
|
14632 |
+
"step": 8356
|
14633 |
+
},
|
14634 |
+
{
|
14635 |
+
"epoch": 0.83,
|
14636 |
+
"grad_norm": 1.591155767440796,
|
14637 |
+
"learning_rate": 8.642524982685268e-06,
|
14638 |
+
"loss": 2.4637,
|
14639 |
+
"step": 8360
|
14640 |
+
},
|
14641 |
+
{
|
14642 |
+
"epoch": 0.83,
|
14643 |
+
"grad_norm": 1.625097393989563,
|
14644 |
+
"learning_rate": 8.622736717126745e-06,
|
14645 |
+
"loss": 2.4071,
|
14646 |
+
"step": 8364
|
14647 |
+
},
|
14648 |
+
{
|
14649 |
+
"epoch": 0.83,
|
14650 |
+
"grad_norm": 1.7765227556228638,
|
14651 |
+
"learning_rate": 8.60294845156822e-06,
|
14652 |
+
"loss": 2.4339,
|
14653 |
+
"step": 8368
|
14654 |
+
},
|
14655 |
+
{
|
14656 |
+
"epoch": 0.83,
|
14657 |
+
"grad_norm": 1.7714086771011353,
|
14658 |
+
"learning_rate": 8.583160186009696e-06,
|
14659 |
+
"loss": 2.4657,
|
14660 |
+
"step": 8372
|
14661 |
+
},
|
14662 |
+
{
|
14663 |
+
"epoch": 0.83,
|
14664 |
+
"grad_norm": 1.5393376350402832,
|
14665 |
+
"learning_rate": 8.563371920451174e-06,
|
14666 |
+
"loss": 2.4295,
|
14667 |
+
"step": 8376
|
14668 |
+
},
|
14669 |
+
{
|
14670 |
+
"epoch": 0.83,
|
14671 |
+
"grad_norm": 1.6363029479980469,
|
14672 |
+
"learning_rate": 8.54358365489265e-06,
|
14673 |
+
"loss": 2.3262,
|
14674 |
+
"step": 8380
|
14675 |
+
},
|
14676 |
+
{
|
14677 |
+
"epoch": 0.83,
|
14678 |
+
"grad_norm": 1.6590903997421265,
|
14679 |
+
"learning_rate": 8.523795389334126e-06,
|
14680 |
+
"loss": 2.5488,
|
14681 |
+
"step": 8384
|
14682 |
+
},
|
14683 |
+
{
|
14684 |
+
"epoch": 0.83,
|
14685 |
+
"grad_norm": 2.057309150695801,
|
14686 |
+
"learning_rate": 8.504007123775601e-06,
|
14687 |
+
"loss": 2.4571,
|
14688 |
+
"step": 8388
|
14689 |
+
},
|
14690 |
+
{
|
14691 |
+
"epoch": 0.83,
|
14692 |
+
"grad_norm": 1.643871784210205,
|
14693 |
+
"learning_rate": 8.484218858217077e-06,
|
14694 |
+
"loss": 2.5755,
|
14695 |
+
"step": 8392
|
14696 |
+
},
|
14697 |
+
{
|
14698 |
+
"epoch": 0.83,
|
14699 |
+
"grad_norm": 1.5784872770309448,
|
14700 |
+
"learning_rate": 8.464430592658554e-06,
|
14701 |
+
"loss": 2.3636,
|
14702 |
+
"step": 8396
|
14703 |
+
},
|
14704 |
+
{
|
14705 |
+
"epoch": 0.83,
|
14706 |
+
"grad_norm": 1.5051548480987549,
|
14707 |
+
"learning_rate": 8.44464232710003e-06,
|
14708 |
+
"loss": 2.3239,
|
14709 |
+
"step": 8400
|
14710 |
+
},
|
14711 |
+
{
|
14712 |
+
"epoch": 0.83,
|
14713 |
+
"grad_norm": 1.7668542861938477,
|
14714 |
+
"learning_rate": 8.424854061541506e-06,
|
14715 |
+
"loss": 2.4491,
|
14716 |
+
"step": 8404
|
14717 |
+
},
|
14718 |
+
{
|
14719 |
+
"epoch": 0.83,
|
14720 |
+
"grad_norm": 1.7257317304611206,
|
14721 |
+
"learning_rate": 8.405065795982982e-06,
|
14722 |
+
"loss": 2.4014,
|
14723 |
+
"step": 8408
|
14724 |
+
},
|
14725 |
+
{
|
14726 |
+
"epoch": 0.83,
|
14727 |
+
"grad_norm": 1.6491328477859497,
|
14728 |
+
"learning_rate": 8.385277530424459e-06,
|
14729 |
+
"loss": 2.5093,
|
14730 |
+
"step": 8412
|
14731 |
+
},
|
14732 |
+
{
|
14733 |
+
"epoch": 0.83,
|
14734 |
+
"grad_norm": 1.5918080806732178,
|
14735 |
+
"learning_rate": 8.365489264865935e-06,
|
14736 |
+
"loss": 2.7333,
|
14737 |
+
"step": 8416
|
14738 |
+
},
|
14739 |
+
{
|
14740 |
+
"epoch": 0.83,
|
14741 |
+
"grad_norm": 1.709262490272522,
|
14742 |
+
"learning_rate": 8.345700999307411e-06,
|
14743 |
+
"loss": 2.3297,
|
14744 |
+
"step": 8420
|
14745 |
+
},
|
14746 |
+
{
|
14747 |
+
"epoch": 0.83,
|
14748 |
+
"grad_norm": 1.5516706705093384,
|
14749 |
+
"learning_rate": 8.325912733748888e-06,
|
14750 |
+
"loss": 2.3839,
|
14751 |
+
"step": 8424
|
14752 |
+
},
|
14753 |
+
{
|
14754 |
+
"epoch": 0.83,
|
14755 |
+
"grad_norm": 1.711029291152954,
|
14756 |
+
"learning_rate": 8.306124468190364e-06,
|
14757 |
+
"loss": 2.4985,
|
14758 |
+
"step": 8428
|
14759 |
+
},
|
14760 |
+
{
|
14761 |
+
"epoch": 0.83,
|
14762 |
+
"grad_norm": 1.5779407024383545,
|
14763 |
+
"learning_rate": 8.286336202631839e-06,
|
14764 |
+
"loss": 2.4944,
|
14765 |
+
"step": 8432
|
14766 |
+
},
|
14767 |
+
{
|
14768 |
+
"epoch": 0.83,
|
14769 |
+
"grad_norm": 1.8659144639968872,
|
14770 |
+
"learning_rate": 8.266547937073315e-06,
|
14771 |
+
"loss": 2.4244,
|
14772 |
+
"step": 8436
|
14773 |
+
},
|
14774 |
+
{
|
14775 |
+
"epoch": 0.84,
|
14776 |
+
"grad_norm": 1.93182373046875,
|
14777 |
+
"learning_rate": 8.246759671514791e-06,
|
14778 |
+
"loss": 2.4782,
|
14779 |
+
"step": 8440
|
14780 |
+
},
|
14781 |
+
{
|
14782 |
+
"epoch": 0.84,
|
14783 |
+
"grad_norm": 1.5297750234603882,
|
14784 |
+
"learning_rate": 8.22697140595627e-06,
|
14785 |
+
"loss": 2.5646,
|
14786 |
+
"step": 8444
|
14787 |
+
},
|
14788 |
+
{
|
14789 |
+
"epoch": 0.84,
|
14790 |
+
"grad_norm": 1.7578099966049194,
|
14791 |
+
"learning_rate": 8.207183140397746e-06,
|
14792 |
+
"loss": 2.5986,
|
14793 |
+
"step": 8448
|
14794 |
+
},
|
14795 |
+
{
|
14796 |
+
"epoch": 0.84,
|
14797 |
+
"grad_norm": 1.7178421020507812,
|
14798 |
+
"learning_rate": 8.18739487483922e-06,
|
14799 |
+
"loss": 2.4336,
|
14800 |
+
"step": 8452
|
14801 |
+
},
|
14802 |
+
{
|
14803 |
+
"epoch": 0.84,
|
14804 |
+
"grad_norm": 1.6756671667099,
|
14805 |
+
"learning_rate": 8.167606609280697e-06,
|
14806 |
+
"loss": 2.5326,
|
14807 |
+
"step": 8456
|
14808 |
+
},
|
14809 |
+
{
|
14810 |
+
"epoch": 0.84,
|
14811 |
+
"grad_norm": 1.681978464126587,
|
14812 |
+
"learning_rate": 8.147818343722173e-06,
|
14813 |
+
"loss": 2.4505,
|
14814 |
+
"step": 8460
|
14815 |
+
},
|
14816 |
+
{
|
14817 |
+
"epoch": 0.84,
|
14818 |
+
"grad_norm": 1.777003288269043,
|
14819 |
+
"learning_rate": 8.12803007816365e-06,
|
14820 |
+
"loss": 2.4595,
|
14821 |
+
"step": 8464
|
14822 |
+
},
|
14823 |
+
{
|
14824 |
+
"epoch": 0.84,
|
14825 |
+
"grad_norm": 1.7001906633377075,
|
14826 |
+
"learning_rate": 8.108241812605126e-06,
|
14827 |
+
"loss": 2.3165,
|
14828 |
+
"step": 8468
|
14829 |
+
},
|
14830 |
+
{
|
14831 |
+
"epoch": 0.84,
|
14832 |
+
"grad_norm": 1.6168255805969238,
|
14833 |
+
"learning_rate": 8.0884535470466e-06,
|
14834 |
+
"loss": 2.4513,
|
14835 |
+
"step": 8472
|
14836 |
+
},
|
14837 |
+
{
|
14838 |
+
"epoch": 0.84,
|
14839 |
+
"grad_norm": 1.7238038778305054,
|
14840 |
+
"learning_rate": 8.068665281488078e-06,
|
14841 |
+
"loss": 2.5386,
|
14842 |
+
"step": 8476
|
14843 |
+
},
|
14844 |
+
{
|
14845 |
+
"epoch": 0.84,
|
14846 |
+
"grad_norm": 1.7556039094924927,
|
14847 |
+
"learning_rate": 8.048877015929555e-06,
|
14848 |
+
"loss": 2.5683,
|
14849 |
+
"step": 8480
|
14850 |
+
},
|
14851 |
+
{
|
14852 |
+
"epoch": 0.84,
|
14853 |
+
"grad_norm": 1.8705509901046753,
|
14854 |
+
"learning_rate": 8.029088750371031e-06,
|
14855 |
+
"loss": 2.411,
|
14856 |
+
"step": 8484
|
14857 |
+
},
|
14858 |
+
{
|
14859 |
+
"epoch": 0.84,
|
14860 |
+
"grad_norm": 1.5241179466247559,
|
14861 |
+
"learning_rate": 8.009300484812507e-06,
|
14862 |
+
"loss": 2.424,
|
14863 |
+
"step": 8488
|
14864 |
+
},
|
14865 |
+
{
|
14866 |
+
"epoch": 0.84,
|
14867 |
+
"grad_norm": 1.6394929885864258,
|
14868 |
+
"learning_rate": 7.989512219253984e-06,
|
14869 |
+
"loss": 2.4855,
|
14870 |
+
"step": 8492
|
14871 |
+
},
|
14872 |
+
{
|
14873 |
+
"epoch": 0.84,
|
14874 |
+
"grad_norm": 1.7148469686508179,
|
14875 |
+
"learning_rate": 7.969723953695458e-06,
|
14876 |
+
"loss": 2.4967,
|
14877 |
+
"step": 8496
|
14878 |
+
},
|
14879 |
+
{
|
14880 |
+
"epoch": 0.84,
|
14881 |
+
"grad_norm": 1.7260617017745972,
|
14882 |
+
"learning_rate": 7.949935688136934e-06,
|
14883 |
+
"loss": 2.5231,
|
14884 |
+
"step": 8500
|
14885 |
+
},
|
14886 |
+
{
|
14887 |
+
"epoch": 0.84,
|
14888 |
+
"grad_norm": 1.84506356716156,
|
14889 |
+
"learning_rate": 7.93014742257841e-06,
|
14890 |
+
"loss": 2.3639,
|
14891 |
+
"step": 8504
|
14892 |
+
},
|
14893 |
+
{
|
14894 |
+
"epoch": 0.84,
|
14895 |
+
"grad_norm": 1.5255876779556274,
|
14896 |
+
"learning_rate": 7.910359157019887e-06,
|
14897 |
+
"loss": 2.4189,
|
14898 |
+
"step": 8508
|
14899 |
+
},
|
14900 |
+
{
|
14901 |
+
"epoch": 0.84,
|
14902 |
+
"grad_norm": 1.7980495691299438,
|
14903 |
+
"learning_rate": 7.890570891461365e-06,
|
14904 |
+
"loss": 2.4907,
|
14905 |
+
"step": 8512
|
14906 |
+
},
|
14907 |
+
{
|
14908 |
+
"epoch": 0.84,
|
14909 |
+
"grad_norm": 1.7311155796051025,
|
14910 |
+
"learning_rate": 7.87078262590284e-06,
|
14911 |
+
"loss": 2.5981,
|
14912 |
+
"step": 8516
|
14913 |
+
},
|
14914 |
+
{
|
14915 |
+
"epoch": 0.84,
|
14916 |
+
"grad_norm": 1.6992567777633667,
|
14917 |
+
"learning_rate": 7.850994360344316e-06,
|
14918 |
+
"loss": 2.5617,
|
14919 |
+
"step": 8520
|
14920 |
+
},
|
14921 |
+
{
|
14922 |
+
"epoch": 0.84,
|
14923 |
+
"grad_norm": 1.6677074432373047,
|
14924 |
+
"learning_rate": 7.831206094785792e-06,
|
14925 |
+
"loss": 2.5869,
|
14926 |
+
"step": 8524
|
14927 |
+
},
|
14928 |
+
{
|
14929 |
+
"epoch": 0.84,
|
14930 |
+
"grad_norm": 1.717405080795288,
|
14931 |
+
"learning_rate": 7.811417829227269e-06,
|
14932 |
+
"loss": 2.534,
|
14933 |
+
"step": 8528
|
14934 |
+
},
|
14935 |
+
{
|
14936 |
+
"epoch": 0.84,
|
14937 |
+
"grad_norm": 1.7169833183288574,
|
14938 |
+
"learning_rate": 7.791629563668745e-06,
|
14939 |
+
"loss": 2.5644,
|
14940 |
+
"step": 8532
|
14941 |
+
},
|
14942 |
+
{
|
14943 |
+
"epoch": 0.84,
|
14944 |
+
"grad_norm": 1.6315467357635498,
|
14945 |
+
"learning_rate": 7.77184129811022e-06,
|
14946 |
+
"loss": 2.5054,
|
14947 |
+
"step": 8536
|
14948 |
+
},
|
14949 |
+
{
|
14950 |
+
"epoch": 0.84,
|
14951 |
+
"grad_norm": 1.7004671096801758,
|
14952 |
+
"learning_rate": 7.752053032551696e-06,
|
14953 |
+
"loss": 2.3946,
|
14954 |
+
"step": 8540
|
14955 |
+
},
|
14956 |
+
{
|
14957 |
+
"epoch": 0.85,
|
14958 |
+
"grad_norm": 1.7185847759246826,
|
14959 |
+
"learning_rate": 7.732264766993174e-06,
|
14960 |
+
"loss": 2.6361,
|
14961 |
+
"step": 8544
|
14962 |
+
},
|
14963 |
+
{
|
14964 |
+
"epoch": 0.85,
|
14965 |
+
"grad_norm": 1.7826788425445557,
|
14966 |
+
"learning_rate": 7.71247650143465e-06,
|
14967 |
+
"loss": 2.4693,
|
14968 |
+
"step": 8548
|
14969 |
+
},
|
14970 |
+
{
|
14971 |
+
"epoch": 0.85,
|
14972 |
+
"grad_norm": 1.5494498014450073,
|
14973 |
+
"learning_rate": 7.692688235876127e-06,
|
14974 |
+
"loss": 2.475,
|
14975 |
+
"step": 8552
|
14976 |
+
},
|
14977 |
+
{
|
14978 |
+
"epoch": 0.85,
|
14979 |
+
"grad_norm": 1.6112264394760132,
|
14980 |
+
"learning_rate": 7.672899970317603e-06,
|
14981 |
+
"loss": 2.685,
|
14982 |
+
"step": 8556
|
14983 |
+
},
|
14984 |
+
{
|
14985 |
+
"epoch": 0.85,
|
14986 |
+
"grad_norm": 1.583173394203186,
|
14987 |
+
"learning_rate": 7.653111704759078e-06,
|
14988 |
+
"loss": 2.4246,
|
14989 |
+
"step": 8560
|
14990 |
+
},
|
14991 |
+
{
|
14992 |
+
"epoch": 0.85,
|
14993 |
+
"grad_norm": 1.6651097536087036,
|
14994 |
+
"learning_rate": 7.633323439200554e-06,
|
14995 |
+
"loss": 2.4226,
|
14996 |
+
"step": 8564
|
14997 |
+
},
|
14998 |
+
{
|
14999 |
+
"epoch": 0.85,
|
15000 |
+
"grad_norm": 1.6389166116714478,
|
15001 |
+
"learning_rate": 7.613535173642031e-06,
|
15002 |
+
"loss": 2.4979,
|
15003 |
+
"step": 8568
|
15004 |
+
},
|
15005 |
+
{
|
15006 |
+
"epoch": 0.85,
|
15007 |
+
"grad_norm": 1.7426066398620605,
|
15008 |
+
"learning_rate": 7.5937469080835074e-06,
|
15009 |
+
"loss": 2.5513,
|
15010 |
+
"step": 8572
|
15011 |
+
},
|
15012 |
+
{
|
15013 |
+
"epoch": 0.85,
|
15014 |
+
"grad_norm": 1.6744520664215088,
|
15015 |
+
"learning_rate": 7.573958642524984e-06,
|
15016 |
+
"loss": 2.4503,
|
15017 |
+
"step": 8576
|
15018 |
+
},
|
15019 |
+
{
|
15020 |
+
"epoch": 0.85,
|
15021 |
+
"grad_norm": 1.7158594131469727,
|
15022 |
+
"learning_rate": 7.554170376966458e-06,
|
15023 |
+
"loss": 2.4517,
|
15024 |
+
"step": 8580
|
15025 |
+
},
|
15026 |
+
{
|
15027 |
+
"epoch": 0.85,
|
15028 |
+
"grad_norm": 1.6604433059692383,
|
15029 |
+
"learning_rate": 7.5343821114079355e-06,
|
15030 |
+
"loss": 2.4247,
|
15031 |
+
"step": 8584
|
15032 |
+
},
|
15033 |
+
{
|
15034 |
+
"epoch": 0.85,
|
15035 |
+
"grad_norm": 1.636860728263855,
|
15036 |
+
"learning_rate": 7.514593845849412e-06,
|
15037 |
+
"loss": 2.2244,
|
15038 |
+
"step": 8588
|
15039 |
+
},
|
15040 |
+
{
|
15041 |
+
"epoch": 0.85,
|
15042 |
+
"grad_norm": 1.6336443424224854,
|
15043 |
+
"learning_rate": 7.494805580290888e-06,
|
15044 |
+
"loss": 2.5156,
|
15045 |
+
"step": 8592
|
15046 |
+
},
|
15047 |
+
{
|
15048 |
+
"epoch": 0.85,
|
15049 |
+
"grad_norm": 1.8386069536209106,
|
15050 |
+
"learning_rate": 7.4750173147323645e-06,
|
15051 |
+
"loss": 2.4954,
|
15052 |
+
"step": 8596
|
15053 |
+
},
|
15054 |
+
{
|
15055 |
+
"epoch": 0.85,
|
15056 |
+
"grad_norm": 1.6216977834701538,
|
15057 |
+
"learning_rate": 7.45522904917384e-06,
|
15058 |
+
"loss": 2.4894,
|
15059 |
+
"step": 8600
|
15060 |
+
},
|
15061 |
+
{
|
15062 |
+
"epoch": 0.85,
|
15063 |
+
"grad_norm": 1.7700227499008179,
|
15064 |
+
"learning_rate": 7.435440783615316e-06,
|
15065 |
+
"loss": 2.6143,
|
15066 |
+
"step": 8604
|
15067 |
+
},
|
15068 |
+
{
|
15069 |
+
"epoch": 0.85,
|
15070 |
+
"grad_norm": 1.6691062450408936,
|
15071 |
+
"learning_rate": 7.415652518056793e-06,
|
15072 |
+
"loss": 2.4329,
|
15073 |
+
"step": 8608
|
15074 |
+
},
|
15075 |
+
{
|
15076 |
+
"epoch": 0.85,
|
15077 |
+
"grad_norm": 1.517760992050171,
|
15078 |
+
"learning_rate": 7.395864252498269e-06,
|
15079 |
+
"loss": 2.402,
|
15080 |
+
"step": 8612
|
15081 |
+
},
|
15082 |
+
{
|
15083 |
+
"epoch": 0.85,
|
15084 |
+
"grad_norm": 1.5939360857009888,
|
15085 |
+
"learning_rate": 7.376075986939745e-06,
|
15086 |
+
"loss": 2.4902,
|
15087 |
+
"step": 8616
|
15088 |
+
},
|
15089 |
+
{
|
15090 |
+
"epoch": 0.85,
|
15091 |
+
"grad_norm": 1.6323703527450562,
|
15092 |
+
"learning_rate": 7.356287721381221e-06,
|
15093 |
+
"loss": 2.6149,
|
15094 |
+
"step": 8620
|
15095 |
+
},
|
15096 |
+
{
|
15097 |
+
"epoch": 0.85,
|
15098 |
+
"grad_norm": 1.5932726860046387,
|
15099 |
+
"learning_rate": 7.336499455822697e-06,
|
15100 |
+
"loss": 2.4341,
|
15101 |
+
"step": 8624
|
15102 |
+
},
|
15103 |
+
{
|
15104 |
+
"epoch": 0.85,
|
15105 |
+
"grad_norm": 1.7558265924453735,
|
15106 |
+
"learning_rate": 7.316711190264173e-06,
|
15107 |
+
"loss": 2.6402,
|
15108 |
+
"step": 8628
|
15109 |
+
},
|
15110 |
+
{
|
15111 |
+
"epoch": 0.85,
|
15112 |
+
"grad_norm": 1.7289772033691406,
|
15113 |
+
"learning_rate": 7.29692292470565e-06,
|
15114 |
+
"loss": 2.3923,
|
15115 |
+
"step": 8632
|
15116 |
+
},
|
15117 |
+
{
|
15118 |
+
"epoch": 0.85,
|
15119 |
+
"grad_norm": 1.800723910331726,
|
15120 |
+
"learning_rate": 7.277134659147127e-06,
|
15121 |
+
"loss": 2.4707,
|
15122 |
+
"step": 8636
|
15123 |
+
},
|
15124 |
+
{
|
15125 |
+
"epoch": 0.85,
|
15126 |
+
"grad_norm": 1.672587275505066,
|
15127 |
+
"learning_rate": 7.257346393588603e-06,
|
15128 |
+
"loss": 2.526,
|
15129 |
+
"step": 8640
|
15130 |
+
},
|
15131 |
+
{
|
15132 |
+
"epoch": 0.86,
|
15133 |
+
"grad_norm": 1.686955451965332,
|
15134 |
+
"learning_rate": 7.237558128030078e-06,
|
15135 |
+
"loss": 2.4433,
|
15136 |
+
"step": 8644
|
15137 |
+
},
|
15138 |
+
{
|
15139 |
+
"epoch": 0.86,
|
15140 |
+
"grad_norm": 1.5995945930480957,
|
15141 |
+
"learning_rate": 7.217769862471554e-06,
|
15142 |
+
"loss": 2.609,
|
15143 |
+
"step": 8648
|
15144 |
+
},
|
15145 |
+
{
|
15146 |
+
"epoch": 0.86,
|
15147 |
+
"grad_norm": 1.6582813262939453,
|
15148 |
+
"learning_rate": 7.197981596913031e-06,
|
15149 |
+
"loss": 2.3675,
|
15150 |
+
"step": 8652
|
15151 |
+
},
|
15152 |
+
{
|
15153 |
+
"epoch": 0.86,
|
15154 |
+
"grad_norm": 1.6642727851867676,
|
15155 |
+
"learning_rate": 7.178193331354508e-06,
|
15156 |
+
"loss": 2.4762,
|
15157 |
+
"step": 8656
|
15158 |
+
},
|
15159 |
+
{
|
15160 |
+
"epoch": 0.86,
|
15161 |
+
"grad_norm": 1.6755313873291016,
|
15162 |
+
"learning_rate": 7.158405065795984e-06,
|
15163 |
+
"loss": 2.4811,
|
15164 |
+
"step": 8660
|
15165 |
+
},
|
15166 |
+
{
|
15167 |
+
"epoch": 0.86,
|
15168 |
+
"grad_norm": 1.787563681602478,
|
15169 |
+
"learning_rate": 7.138616800237459e-06,
|
15170 |
+
"loss": 2.4481,
|
15171 |
+
"step": 8664
|
15172 |
+
},
|
15173 |
+
{
|
15174 |
+
"epoch": 0.86,
|
15175 |
+
"grad_norm": 1.5694202184677124,
|
15176 |
+
"learning_rate": 7.118828534678936e-06,
|
15177 |
+
"loss": 2.44,
|
15178 |
+
"step": 8668
|
15179 |
+
},
|
15180 |
+
{
|
15181 |
+
"epoch": 0.86,
|
15182 |
+
"grad_norm": 1.6753268241882324,
|
15183 |
+
"learning_rate": 7.099040269120412e-06,
|
15184 |
+
"loss": 2.664,
|
15185 |
+
"step": 8672
|
15186 |
+
},
|
15187 |
+
{
|
15188 |
+
"epoch": 0.86,
|
15189 |
+
"grad_norm": 1.8283534049987793,
|
15190 |
+
"learning_rate": 7.079252003561888e-06,
|
15191 |
+
"loss": 2.4761,
|
15192 |
+
"step": 8676
|
15193 |
+
},
|
15194 |
+
{
|
15195 |
+
"epoch": 0.86,
|
15196 |
+
"grad_norm": 1.730433702468872,
|
15197 |
+
"learning_rate": 7.059463738003365e-06,
|
15198 |
+
"loss": 2.5623,
|
15199 |
+
"step": 8680
|
15200 |
+
},
|
15201 |
+
{
|
15202 |
+
"epoch": 0.86,
|
15203 |
+
"grad_norm": 1.7397688627243042,
|
15204 |
+
"learning_rate": 7.03967547244484e-06,
|
15205 |
+
"loss": 2.6005,
|
15206 |
+
"step": 8684
|
15207 |
+
},
|
15208 |
+
{
|
15209 |
+
"epoch": 0.86,
|
15210 |
+
"grad_norm": 1.7049192190170288,
|
15211 |
+
"learning_rate": 7.0198872068863165e-06,
|
15212 |
+
"loss": 2.4976,
|
15213 |
+
"step": 8688
|
15214 |
+
},
|
15215 |
+
{
|
15216 |
+
"epoch": 0.86,
|
15217 |
+
"grad_norm": 1.7921533584594727,
|
15218 |
+
"learning_rate": 7.000098941327793e-06,
|
15219 |
+
"loss": 2.5239,
|
15220 |
+
"step": 8692
|
15221 |
+
},
|
15222 |
+
{
|
15223 |
+
"epoch": 0.86,
|
15224 |
+
"grad_norm": 1.7059839963912964,
|
15225 |
+
"learning_rate": 6.980310675769269e-06,
|
15226 |
+
"loss": 2.403,
|
15227 |
+
"step": 8696
|
15228 |
+
},
|
15229 |
+
{
|
15230 |
+
"epoch": 0.86,
|
15231 |
+
"grad_norm": 1.636533260345459,
|
15232 |
+
"learning_rate": 6.9605224102107455e-06,
|
15233 |
+
"loss": 2.4023,
|
15234 |
+
"step": 8700
|
15235 |
+
},
|
15236 |
+
{
|
15237 |
+
"epoch": 0.86,
|
15238 |
+
"grad_norm": 1.6458524465560913,
|
15239 |
+
"learning_rate": 6.940734144652221e-06,
|
15240 |
+
"loss": 2.6021,
|
15241 |
+
"step": 8704
|
15242 |
+
},
|
15243 |
+
{
|
15244 |
+
"epoch": 0.86,
|
15245 |
+
"grad_norm": 1.8312097787857056,
|
15246 |
+
"learning_rate": 6.920945879093697e-06,
|
15247 |
+
"loss": 2.4073,
|
15248 |
+
"step": 8708
|
15249 |
+
},
|
15250 |
+
{
|
15251 |
+
"epoch": 0.86,
|
15252 |
+
"grad_norm": 1.6643646955490112,
|
15253 |
+
"learning_rate": 6.901157613535174e-06,
|
15254 |
+
"loss": 2.4315,
|
15255 |
+
"step": 8712
|
15256 |
+
},
|
15257 |
+
{
|
15258 |
+
"epoch": 0.86,
|
15259 |
+
"grad_norm": 1.7501451969146729,
|
15260 |
+
"learning_rate": 6.88136934797665e-06,
|
15261 |
+
"loss": 2.4771,
|
15262 |
+
"step": 8716
|
15263 |
+
},
|
15264 |
+
{
|
15265 |
+
"epoch": 0.86,
|
15266 |
+
"grad_norm": 1.6172442436218262,
|
15267 |
+
"learning_rate": 6.861581082418127e-06,
|
15268 |
+
"loss": 2.4944,
|
15269 |
+
"step": 8720
|
15270 |
+
},
|
15271 |
+
{
|
15272 |
+
"epoch": 0.86,
|
15273 |
+
"grad_norm": 1.7708154916763306,
|
15274 |
+
"learning_rate": 6.841792816859603e-06,
|
15275 |
+
"loss": 2.5976,
|
15276 |
+
"step": 8724
|
15277 |
+
},
|
15278 |
+
{
|
15279 |
+
"epoch": 0.86,
|
15280 |
+
"grad_norm": 1.640822410583496,
|
15281 |
+
"learning_rate": 6.822004551301078e-06,
|
15282 |
+
"loss": 2.4854,
|
15283 |
+
"step": 8728
|
15284 |
+
},
|
15285 |
+
{
|
15286 |
+
"epoch": 0.86,
|
15287 |
+
"grad_norm": 1.7448416948318481,
|
15288 |
+
"learning_rate": 6.802216285742554e-06,
|
15289 |
+
"loss": 2.5023,
|
15290 |
+
"step": 8732
|
15291 |
+
},
|
15292 |
+
{
|
15293 |
+
"epoch": 0.86,
|
15294 |
+
"grad_norm": 1.7678117752075195,
|
15295 |
+
"learning_rate": 6.7824280201840315e-06,
|
15296 |
+
"loss": 2.3739,
|
15297 |
+
"step": 8736
|
15298 |
+
},
|
15299 |
+
{
|
15300 |
+
"epoch": 0.86,
|
15301 |
+
"grad_norm": 1.7742412090301514,
|
15302 |
+
"learning_rate": 6.762639754625508e-06,
|
15303 |
+
"loss": 2.5075,
|
15304 |
+
"step": 8740
|
15305 |
+
},
|
15306 |
+
{
|
15307 |
+
"epoch": 0.87,
|
15308 |
+
"grad_norm": 1.6153099536895752,
|
15309 |
+
"learning_rate": 6.742851489066984e-06,
|
15310 |
+
"loss": 2.3971,
|
15311 |
+
"step": 8744
|
15312 |
+
},
|
15313 |
+
{
|
15314 |
+
"epoch": 0.87,
|
15315 |
+
"grad_norm": 2.1363136768341064,
|
15316 |
+
"learning_rate": 6.72306322350846e-06,
|
15317 |
+
"loss": 2.5768,
|
15318 |
+
"step": 8748
|
15319 |
+
},
|
15320 |
+
{
|
15321 |
+
"epoch": 0.87,
|
15322 |
+
"grad_norm": 1.694061279296875,
|
15323 |
+
"learning_rate": 6.703274957949936e-06,
|
15324 |
+
"loss": 2.4628,
|
15325 |
+
"step": 8752
|
15326 |
+
},
|
15327 |
+
{
|
15328 |
+
"epoch": 0.87,
|
15329 |
+
"grad_norm": 1.5482895374298096,
|
15330 |
+
"learning_rate": 6.683486692391412e-06,
|
15331 |
+
"loss": 2.3695,
|
15332 |
+
"step": 8756
|
15333 |
+
},
|
15334 |
+
{
|
15335 |
+
"epoch": 0.87,
|
15336 |
+
"grad_norm": 1.6727862358093262,
|
15337 |
+
"learning_rate": 6.663698426832889e-06,
|
15338 |
+
"loss": 2.5452,
|
15339 |
+
"step": 8760
|
15340 |
+
},
|
15341 |
+
{
|
15342 |
+
"epoch": 0.87,
|
15343 |
+
"grad_norm": 2.0361154079437256,
|
15344 |
+
"learning_rate": 6.643910161274365e-06,
|
15345 |
+
"loss": 2.7449,
|
15346 |
+
"step": 8764
|
15347 |
+
},
|
15348 |
+
{
|
15349 |
+
"epoch": 0.87,
|
15350 |
+
"grad_norm": 1.6247440576553345,
|
15351 |
+
"learning_rate": 6.62412189571584e-06,
|
15352 |
+
"loss": 2.5772,
|
15353 |
+
"step": 8768
|
15354 |
+
},
|
15355 |
+
{
|
15356 |
+
"epoch": 0.87,
|
15357 |
+
"grad_norm": 1.6015691757202148,
|
15358 |
+
"learning_rate": 6.604333630157317e-06,
|
15359 |
+
"loss": 2.5729,
|
15360 |
+
"step": 8772
|
15361 |
+
},
|
15362 |
+
{
|
15363 |
+
"epoch": 0.87,
|
15364 |
+
"grad_norm": 1.7053395509719849,
|
15365 |
+
"learning_rate": 6.584545364598793e-06,
|
15366 |
+
"loss": 2.541,
|
15367 |
+
"step": 8776
|
15368 |
+
},
|
15369 |
+
{
|
15370 |
+
"epoch": 0.87,
|
15371 |
+
"grad_norm": 1.6078673601150513,
|
15372 |
+
"learning_rate": 6.564757099040269e-06,
|
15373 |
+
"loss": 2.5443,
|
15374 |
+
"step": 8780
|
15375 |
+
},
|
15376 |
+
{
|
15377 |
+
"epoch": 0.87,
|
15378 |
+
"grad_norm": 1.6850202083587646,
|
15379 |
+
"learning_rate": 6.544968833481746e-06,
|
15380 |
+
"loss": 2.6706,
|
15381 |
+
"step": 8784
|
15382 |
+
},
|
15383 |
+
{
|
15384 |
+
"epoch": 0.87,
|
15385 |
+
"grad_norm": 1.6219288110733032,
|
15386 |
+
"learning_rate": 6.525180567923223e-06,
|
15387 |
+
"loss": 2.5243,
|
15388 |
+
"step": 8788
|
15389 |
+
},
|
15390 |
+
{
|
15391 |
+
"epoch": 0.87,
|
15392 |
+
"grad_norm": 1.8155920505523682,
|
15393 |
+
"learning_rate": 6.5053923023646975e-06,
|
15394 |
+
"loss": 2.5544,
|
15395 |
+
"step": 8792
|
15396 |
+
},
|
15397 |
+
{
|
15398 |
+
"epoch": 0.87,
|
15399 |
+
"grad_norm": 1.7686444520950317,
|
15400 |
+
"learning_rate": 6.485604036806174e-06,
|
15401 |
+
"loss": 2.3905,
|
15402 |
+
"step": 8796
|
15403 |
+
},
|
15404 |
+
{
|
15405 |
+
"epoch": 0.87,
|
15406 |
+
"grad_norm": 1.9808334112167358,
|
15407 |
+
"learning_rate": 6.46581577124765e-06,
|
15408 |
+
"loss": 2.5068,
|
15409 |
+
"step": 8800
|
15410 |
+
},
|
15411 |
+
{
|
15412 |
+
"epoch": 0.87,
|
15413 |
+
"grad_norm": 1.7255302667617798,
|
15414 |
+
"learning_rate": 6.446027505689127e-06,
|
15415 |
+
"loss": 2.4938,
|
15416 |
+
"step": 8804
|
15417 |
+
},
|
15418 |
+
{
|
15419 |
+
"epoch": 0.87,
|
15420 |
+
"grad_norm": 1.6764934062957764,
|
15421 |
+
"learning_rate": 6.426239240130604e-06,
|
15422 |
+
"loss": 2.3194,
|
15423 |
+
"step": 8808
|
15424 |
+
},
|
15425 |
+
{
|
15426 |
+
"epoch": 0.87,
|
15427 |
+
"grad_norm": 1.5869959592819214,
|
15428 |
+
"learning_rate": 6.406450974572078e-06,
|
15429 |
+
"loss": 2.4587,
|
15430 |
+
"step": 8812
|
15431 |
+
},
|
15432 |
+
{
|
15433 |
+
"epoch": 0.87,
|
15434 |
+
"grad_norm": 1.5677440166473389,
|
15435 |
+
"learning_rate": 6.386662709013555e-06,
|
15436 |
+
"loss": 2.4172,
|
15437 |
+
"step": 8816
|
15438 |
+
},
|
15439 |
+
{
|
15440 |
+
"epoch": 0.87,
|
15441 |
+
"grad_norm": 1.6823347806930542,
|
15442 |
+
"learning_rate": 6.366874443455032e-06,
|
15443 |
+
"loss": 2.4559,
|
15444 |
+
"step": 8820
|
15445 |
+
},
|
15446 |
+
{
|
15447 |
+
"epoch": 0.87,
|
15448 |
+
"grad_norm": 1.6375516653060913,
|
15449 |
+
"learning_rate": 6.347086177896508e-06,
|
15450 |
+
"loss": 2.5198,
|
15451 |
+
"step": 8824
|
15452 |
+
},
|
15453 |
+
{
|
15454 |
+
"epoch": 0.87,
|
15455 |
+
"grad_norm": 1.629103422164917,
|
15456 |
+
"learning_rate": 6.327297912337984e-06,
|
15457 |
+
"loss": 2.5148,
|
15458 |
+
"step": 8828
|
15459 |
+
},
|
15460 |
+
{
|
15461 |
+
"epoch": 0.87,
|
15462 |
+
"grad_norm": 1.5278247594833374,
|
15463 |
+
"learning_rate": 6.30750964677946e-06,
|
15464 |
+
"loss": 2.4411,
|
15465 |
+
"step": 8832
|
15466 |
+
},
|
15467 |
+
{
|
15468 |
+
"epoch": 0.87,
|
15469 |
+
"grad_norm": 1.8897967338562012,
|
15470 |
+
"learning_rate": 6.287721381220936e-06,
|
15471 |
+
"loss": 2.4329,
|
15472 |
+
"step": 8836
|
15473 |
+
},
|
15474 |
+
{
|
15475 |
+
"epoch": 0.87,
|
15476 |
+
"grad_norm": 1.6970409154891968,
|
15477 |
+
"learning_rate": 6.2679331156624125e-06,
|
15478 |
+
"loss": 2.7446,
|
15479 |
+
"step": 8840
|
15480 |
+
},
|
15481 |
+
{
|
15482 |
+
"epoch": 0.88,
|
15483 |
+
"grad_norm": 1.5253138542175293,
|
15484 |
+
"learning_rate": 6.248144850103889e-06,
|
15485 |
+
"loss": 2.4237,
|
15486 |
+
"step": 8844
|
15487 |
+
},
|
15488 |
+
{
|
15489 |
+
"epoch": 0.88,
|
15490 |
+
"grad_norm": 1.6197317838668823,
|
15491 |
+
"learning_rate": 6.228356584545364e-06,
|
15492 |
+
"loss": 2.5055,
|
15493 |
+
"step": 8848
|
15494 |
+
},
|
15495 |
+
{
|
15496 |
+
"epoch": 0.88,
|
15497 |
+
"grad_norm": 1.5637788772583008,
|
15498 |
+
"learning_rate": 6.2085683189868415e-06,
|
15499 |
+
"loss": 2.3537,
|
15500 |
+
"step": 8852
|
15501 |
+
},
|
15502 |
+
{
|
15503 |
+
"epoch": 0.88,
|
15504 |
+
"grad_norm": 1.662288784980774,
|
15505 |
+
"learning_rate": 6.188780053428318e-06,
|
15506 |
+
"loss": 2.3261,
|
15507 |
+
"step": 8856
|
15508 |
+
},
|
15509 |
+
{
|
15510 |
+
"epoch": 0.88,
|
15511 |
+
"grad_norm": 1.642146348953247,
|
15512 |
+
"learning_rate": 6.168991787869793e-06,
|
15513 |
+
"loss": 2.4903,
|
15514 |
+
"step": 8860
|
15515 |
+
},
|
15516 |
+
{
|
15517 |
+
"epoch": 0.88,
|
15518 |
+
"grad_norm": 1.6560384035110474,
|
15519 |
+
"learning_rate": 6.1492035223112696e-06,
|
15520 |
+
"loss": 2.5563,
|
15521 |
+
"step": 8864
|
15522 |
+
},
|
15523 |
+
{
|
15524 |
+
"epoch": 0.88,
|
15525 |
+
"grad_norm": 1.7499784231185913,
|
15526 |
+
"learning_rate": 6.129415256752746e-06,
|
15527 |
+
"loss": 2.5078,
|
15528 |
+
"step": 8868
|
15529 |
+
},
|
15530 |
+
{
|
15531 |
+
"epoch": 0.88,
|
15532 |
+
"grad_norm": 1.8147586584091187,
|
15533 |
+
"learning_rate": 6.109626991194222e-06,
|
15534 |
+
"loss": 2.3585,
|
15535 |
+
"step": 8872
|
15536 |
+
},
|
15537 |
+
{
|
15538 |
+
"epoch": 0.88,
|
15539 |
+
"grad_norm": 1.6541246175765991,
|
15540 |
+
"learning_rate": 6.0898387256356985e-06,
|
15541 |
+
"loss": 2.5436,
|
15542 |
+
"step": 8876
|
15543 |
+
},
|
15544 |
+
{
|
15545 |
+
"epoch": 0.88,
|
15546 |
+
"grad_norm": 1.8309890031814575,
|
15547 |
+
"learning_rate": 6.070050460077174e-06,
|
15548 |
+
"loss": 2.6095,
|
15549 |
+
"step": 8880
|
15550 |
+
},
|
15551 |
+
{
|
15552 |
+
"epoch": 0.88,
|
15553 |
+
"grad_norm": 1.725372552871704,
|
15554 |
+
"learning_rate": 6.050262194518651e-06,
|
15555 |
+
"loss": 2.5652,
|
15556 |
+
"step": 8884
|
15557 |
+
},
|
15558 |
+
{
|
15559 |
+
"epoch": 0.88,
|
15560 |
+
"grad_norm": 1.6648577451705933,
|
15561 |
+
"learning_rate": 6.0304739289601275e-06,
|
15562 |
+
"loss": 2.4624,
|
15563 |
+
"step": 8888
|
15564 |
+
},
|
15565 |
+
{
|
15566 |
+
"epoch": 0.88,
|
15567 |
+
"grad_norm": 1.6224662065505981,
|
15568 |
+
"learning_rate": 6.010685663401603e-06,
|
15569 |
+
"loss": 2.4616,
|
15570 |
+
"step": 8892
|
15571 |
+
},
|
15572 |
+
{
|
15573 |
+
"epoch": 0.88,
|
15574 |
+
"grad_norm": 1.535581111907959,
|
15575 |
+
"learning_rate": 5.990897397843079e-06,
|
15576 |
+
"loss": 2.3335,
|
15577 |
+
"step": 8896
|
15578 |
+
},
|
15579 |
+
{
|
15580 |
+
"epoch": 0.88,
|
15581 |
+
"grad_norm": 1.5054175853729248,
|
15582 |
+
"learning_rate": 5.971109132284556e-06,
|
15583 |
+
"loss": 2.3097,
|
15584 |
+
"step": 8900
|
15585 |
+
},
|
15586 |
+
{
|
15587 |
+
"epoch": 0.88,
|
15588 |
+
"grad_norm": 1.7295256853103638,
|
15589 |
+
"learning_rate": 5.951320866726032e-06,
|
15590 |
+
"loss": 2.538,
|
15591 |
+
"step": 8904
|
15592 |
+
},
|
15593 |
+
{
|
15594 |
+
"epoch": 0.88,
|
15595 |
+
"grad_norm": 1.6823656558990479,
|
15596 |
+
"learning_rate": 5.931532601167508e-06,
|
15597 |
+
"loss": 2.4051,
|
15598 |
+
"step": 8908
|
15599 |
+
},
|
15600 |
+
{
|
15601 |
+
"epoch": 0.88,
|
15602 |
+
"grad_norm": 1.702989935874939,
|
15603 |
+
"learning_rate": 5.911744335608984e-06,
|
15604 |
+
"loss": 2.3926,
|
15605 |
+
"step": 8912
|
15606 |
+
},
|
15607 |
+
{
|
15608 |
+
"epoch": 0.88,
|
15609 |
+
"grad_norm": 1.6187750101089478,
|
15610 |
+
"learning_rate": 5.89195607005046e-06,
|
15611 |
+
"loss": 2.2627,
|
15612 |
+
"step": 8916
|
15613 |
+
},
|
15614 |
+
{
|
15615 |
+
"epoch": 0.88,
|
15616 |
+
"grad_norm": 1.6742513179779053,
|
15617 |
+
"learning_rate": 5.872167804491937e-06,
|
15618 |
+
"loss": 2.3604,
|
15619 |
+
"step": 8920
|
15620 |
+
},
|
15621 |
+
{
|
15622 |
+
"epoch": 0.88,
|
15623 |
+
"grad_norm": 1.7713173627853394,
|
15624 |
+
"learning_rate": 5.852379538933413e-06,
|
15625 |
+
"loss": 2.557,
|
15626 |
+
"step": 8924
|
15627 |
+
},
|
15628 |
+
{
|
15629 |
+
"epoch": 0.88,
|
15630 |
+
"grad_norm": 1.624006748199463,
|
15631 |
+
"learning_rate": 5.832591273374889e-06,
|
15632 |
+
"loss": 2.4546,
|
15633 |
+
"step": 8928
|
15634 |
+
},
|
15635 |
+
{
|
15636 |
+
"epoch": 0.88,
|
15637 |
+
"grad_norm": 1.8159652948379517,
|
15638 |
+
"learning_rate": 5.812803007816365e-06,
|
15639 |
+
"loss": 2.4153,
|
15640 |
+
"step": 8932
|
15641 |
+
},
|
15642 |
+
{
|
15643 |
+
"epoch": 0.88,
|
15644 |
+
"grad_norm": 1.6506266593933105,
|
15645 |
+
"learning_rate": 5.793014742257842e-06,
|
15646 |
+
"loss": 2.6401,
|
15647 |
+
"step": 8936
|
15648 |
+
},
|
15649 |
+
{
|
15650 |
+
"epoch": 0.88,
|
15651 |
+
"grad_norm": 1.7533698081970215,
|
15652 |
+
"learning_rate": 5.773226476699318e-06,
|
15653 |
+
"loss": 2.5884,
|
15654 |
+
"step": 8940
|
15655 |
+
},
|
15656 |
+
{
|
15657 |
+
"epoch": 0.88,
|
15658 |
+
"grad_norm": 1.6324323415756226,
|
15659 |
+
"learning_rate": 5.7534382111407935e-06,
|
15660 |
+
"loss": 2.2965,
|
15661 |
+
"step": 8944
|
15662 |
+
},
|
15663 |
+
{
|
15664 |
+
"epoch": 0.89,
|
15665 |
+
"grad_norm": 1.5913504362106323,
|
15666 |
+
"learning_rate": 5.73364994558227e-06,
|
15667 |
+
"loss": 2.4982,
|
15668 |
+
"step": 8948
|
15669 |
+
},
|
15670 |
+
{
|
15671 |
+
"epoch": 0.89,
|
15672 |
+
"grad_norm": 1.9502431154251099,
|
15673 |
+
"learning_rate": 5.713861680023746e-06,
|
15674 |
+
"loss": 2.3815,
|
15675 |
+
"step": 8952
|
15676 |
+
},
|
15677 |
+
{
|
15678 |
+
"epoch": 0.89,
|
15679 |
+
"grad_norm": 1.5532846450805664,
|
15680 |
+
"learning_rate": 5.694073414465222e-06,
|
15681 |
+
"loss": 2.4715,
|
15682 |
+
"step": 8956
|
15683 |
+
},
|
15684 |
+
{
|
15685 |
+
"epoch": 0.89,
|
15686 |
+
"grad_norm": 1.6778051853179932,
|
15687 |
+
"learning_rate": 5.674285148906699e-06,
|
15688 |
+
"loss": 2.2548,
|
15689 |
+
"step": 8960
|
15690 |
+
},
|
15691 |
+
{
|
15692 |
+
"epoch": 0.89,
|
15693 |
+
"grad_norm": 1.5361812114715576,
|
15694 |
+
"learning_rate": 5.654496883348174e-06,
|
15695 |
+
"loss": 2.4623,
|
15696 |
+
"step": 8964
|
15697 |
+
},
|
15698 |
+
{
|
15699 |
+
"epoch": 0.89,
|
15700 |
+
"grad_norm": 1.6158546209335327,
|
15701 |
+
"learning_rate": 5.634708617789651e-06,
|
15702 |
+
"loss": 2.4732,
|
15703 |
+
"step": 8968
|
15704 |
+
},
|
15705 |
+
{
|
15706 |
+
"epoch": 0.89,
|
15707 |
+
"grad_norm": 1.7343107461929321,
|
15708 |
+
"learning_rate": 5.614920352231128e-06,
|
15709 |
+
"loss": 2.4489,
|
15710 |
+
"step": 8972
|
15711 |
+
},
|
15712 |
+
{
|
15713 |
+
"epoch": 0.89,
|
15714 |
+
"grad_norm": 1.6918219327926636,
|
15715 |
+
"learning_rate": 5.595132086672603e-06,
|
15716 |
+
"loss": 2.4923,
|
15717 |
+
"step": 8976
|
15718 |
+
},
|
15719 |
+
{
|
15720 |
+
"epoch": 0.89,
|
15721 |
+
"grad_norm": 1.725468397140503,
|
15722 |
+
"learning_rate": 5.5753438211140795e-06,
|
15723 |
+
"loss": 2.5126,
|
15724 |
+
"step": 8980
|
15725 |
+
},
|
15726 |
+
{
|
15727 |
+
"epoch": 0.89,
|
15728 |
+
"grad_norm": 1.6052392721176147,
|
15729 |
+
"learning_rate": 5.555555555555556e-06,
|
15730 |
+
"loss": 2.3489,
|
15731 |
+
"step": 8984
|
15732 |
+
},
|
15733 |
+
{
|
15734 |
+
"epoch": 0.89,
|
15735 |
+
"grad_norm": 1.6850762367248535,
|
15736 |
+
"learning_rate": 5.535767289997032e-06,
|
15737 |
+
"loss": 2.5302,
|
15738 |
+
"step": 8988
|
15739 |
+
},
|
15740 |
+
{
|
15741 |
+
"epoch": 0.89,
|
15742 |
+
"grad_norm": 2.009579658508301,
|
15743 |
+
"learning_rate": 5.5159790244385085e-06,
|
15744 |
+
"loss": 2.4038,
|
15745 |
+
"step": 8992
|
15746 |
+
},
|
15747 |
+
{
|
15748 |
+
"epoch": 0.89,
|
15749 |
+
"grad_norm": 1.787927269935608,
|
15750 |
+
"learning_rate": 5.496190758879984e-06,
|
15751 |
+
"loss": 2.4092,
|
15752 |
+
"step": 8996
|
15753 |
+
},
|
15754 |
+
{
|
15755 |
+
"epoch": 0.89,
|
15756 |
+
"grad_norm": 1.583129644393921,
|
15757 |
+
"learning_rate": 5.476402493321461e-06,
|
15758 |
+
"loss": 2.5511,
|
15759 |
+
"step": 9000
|
15760 |
+
},
|
15761 |
+
{
|
15762 |
+
"epoch": 0.89,
|
15763 |
+
"grad_norm": 1.5953203439712524,
|
15764 |
+
"learning_rate": 5.4566142277629374e-06,
|
15765 |
+
"loss": 2.3661,
|
15766 |
+
"step": 9004
|
15767 |
+
},
|
15768 |
+
{
|
15769 |
+
"epoch": 0.89,
|
15770 |
+
"grad_norm": 1.6569699048995972,
|
15771 |
+
"learning_rate": 5.436825962204413e-06,
|
15772 |
+
"loss": 2.3866,
|
15773 |
+
"step": 9008
|
15774 |
+
},
|
15775 |
+
{
|
15776 |
+
"epoch": 0.89,
|
15777 |
+
"grad_norm": 1.5289307832717896,
|
15778 |
+
"learning_rate": 5.417037696645889e-06,
|
15779 |
+
"loss": 2.4527,
|
15780 |
+
"step": 9012
|
15781 |
+
},
|
15782 |
+
{
|
15783 |
+
"epoch": 0.89,
|
15784 |
+
"grad_norm": 1.5741091966629028,
|
15785 |
+
"learning_rate": 5.3972494310873655e-06,
|
15786 |
+
"loss": 2.5182,
|
15787 |
+
"step": 9016
|
15788 |
+
},
|
15789 |
+
{
|
15790 |
+
"epoch": 0.89,
|
15791 |
+
"grad_norm": 1.672529697418213,
|
15792 |
+
"learning_rate": 5.377461165528842e-06,
|
15793 |
+
"loss": 2.4246,
|
15794 |
+
"step": 9020
|
15795 |
+
},
|
15796 |
+
{
|
15797 |
+
"epoch": 0.89,
|
15798 |
+
"grad_norm": 1.5960274934768677,
|
15799 |
+
"learning_rate": 5.357672899970318e-06,
|
15800 |
+
"loss": 2.3121,
|
15801 |
+
"step": 9024
|
15802 |
+
},
|
15803 |
+
{
|
15804 |
+
"epoch": 0.89,
|
15805 |
+
"grad_norm": 1.9494187831878662,
|
15806 |
+
"learning_rate": 5.337884634411794e-06,
|
15807 |
+
"loss": 2.3272,
|
15808 |
+
"step": 9028
|
15809 |
+
},
|
15810 |
+
{
|
15811 |
+
"epoch": 0.89,
|
15812 |
+
"grad_norm": 1.8283107280731201,
|
15813 |
+
"learning_rate": 5.31809636885327e-06,
|
15814 |
+
"loss": 2.4936,
|
15815 |
+
"step": 9032
|
15816 |
+
},
|
15817 |
+
{
|
15818 |
+
"epoch": 0.89,
|
15819 |
+
"grad_norm": 1.6064289808273315,
|
15820 |
+
"learning_rate": 5.298308103294747e-06,
|
15821 |
+
"loss": 2.5352,
|
15822 |
+
"step": 9036
|
15823 |
+
},
|
15824 |
+
{
|
15825 |
+
"epoch": 0.89,
|
15826 |
+
"grad_norm": 1.6828787326812744,
|
15827 |
+
"learning_rate": 5.278519837736223e-06,
|
15828 |
+
"loss": 2.2623,
|
15829 |
+
"step": 9040
|
15830 |
+
},
|
15831 |
+
{
|
15832 |
+
"epoch": 0.89,
|
15833 |
+
"grad_norm": 1.93602454662323,
|
15834 |
+
"learning_rate": 5.258731572177699e-06,
|
15835 |
+
"loss": 2.4931,
|
15836 |
+
"step": 9044
|
15837 |
+
},
|
15838 |
+
{
|
15839 |
+
"epoch": 0.9,
|
15840 |
+
"grad_norm": 1.6569011211395264,
|
15841 |
+
"learning_rate": 5.238943306619174e-06,
|
15842 |
+
"loss": 2.4178,
|
15843 |
+
"step": 9048
|
15844 |
+
},
|
15845 |
+
{
|
15846 |
+
"epoch": 0.9,
|
15847 |
+
"grad_norm": 1.6328504085540771,
|
15848 |
+
"learning_rate": 5.219155041060652e-06,
|
15849 |
+
"loss": 2.6022,
|
15850 |
+
"step": 9052
|
15851 |
+
},
|
15852 |
+
{
|
15853 |
+
"epoch": 0.9,
|
15854 |
+
"grad_norm": 1.7538514137268066,
|
15855 |
+
"learning_rate": 5.199366775502128e-06,
|
15856 |
+
"loss": 2.3894,
|
15857 |
+
"step": 9056
|
15858 |
+
},
|
15859 |
+
{
|
15860 |
+
"epoch": 0.9,
|
15861 |
+
"grad_norm": 1.7447439432144165,
|
15862 |
+
"learning_rate": 5.179578509943603e-06,
|
15863 |
+
"loss": 2.3315,
|
15864 |
+
"step": 9060
|
15865 |
+
},
|
15866 |
+
{
|
15867 |
+
"epoch": 0.9,
|
15868 |
+
"grad_norm": 1.793074607849121,
|
15869 |
+
"learning_rate": 5.15979024438508e-06,
|
15870 |
+
"loss": 2.4952,
|
15871 |
+
"step": 9064
|
15872 |
+
},
|
15873 |
+
{
|
15874 |
+
"epoch": 0.9,
|
15875 |
+
"grad_norm": 1.6562350988388062,
|
15876 |
+
"learning_rate": 5.140001978826556e-06,
|
15877 |
+
"loss": 2.3996,
|
15878 |
+
"step": 9068
|
15879 |
+
},
|
15880 |
+
{
|
15881 |
+
"epoch": 0.9,
|
15882 |
+
"grad_norm": 1.7558027505874634,
|
15883 |
+
"learning_rate": 5.120213713268032e-06,
|
15884 |
+
"loss": 2.4778,
|
15885 |
+
"step": 9072
|
15886 |
+
},
|
15887 |
+
{
|
15888 |
+
"epoch": 0.9,
|
15889 |
+
"grad_norm": 1.6544588804244995,
|
15890 |
+
"learning_rate": 5.100425447709509e-06,
|
15891 |
+
"loss": 2.5229,
|
15892 |
+
"step": 9076
|
15893 |
+
},
|
15894 |
+
{
|
15895 |
+
"epoch": 0.9,
|
15896 |
+
"grad_norm": 1.775094985961914,
|
15897 |
+
"learning_rate": 5.080637182150984e-06,
|
15898 |
+
"loss": 2.3536,
|
15899 |
+
"step": 9080
|
15900 |
+
},
|
15901 |
+
{
|
15902 |
+
"epoch": 0.9,
|
15903 |
+
"grad_norm": 1.9515008926391602,
|
15904 |
+
"learning_rate": 5.060848916592461e-06,
|
15905 |
+
"loss": 2.4411,
|
15906 |
+
"step": 9084
|
15907 |
+
},
|
15908 |
+
{
|
15909 |
+
"epoch": 0.9,
|
15910 |
+
"grad_norm": 1.8084925413131714,
|
15911 |
+
"learning_rate": 5.041060651033938e-06,
|
15912 |
+
"loss": 2.3756,
|
15913 |
+
"step": 9088
|
15914 |
+
},
|
15915 |
+
{
|
15916 |
+
"epoch": 0.9,
|
15917 |
+
"grad_norm": 1.6303074359893799,
|
15918 |
+
"learning_rate": 5.021272385475413e-06,
|
15919 |
+
"loss": 2.3407,
|
15920 |
+
"step": 9092
|
15921 |
+
},
|
15922 |
+
{
|
15923 |
+
"epoch": 0.9,
|
15924 |
+
"grad_norm": 1.6556874513626099,
|
15925 |
+
"learning_rate": 5.0014841199168894e-06,
|
15926 |
+
"loss": 2.4766,
|
15927 |
+
"step": 9096
|
15928 |
}
|
15929 |
],
|
15930 |
"logging_steps": 4,
|
|
|
15932 |
"num_input_tokens_seen": 0,
|
15933 |
"num_train_epochs": 1,
|
15934 |
"save_steps": 1011,
|
15935 |
+
"total_flos": 1.1128110498761933e+17,
|
15936 |
"train_batch_size": 4,
|
15937 |
"trial_name": null,
|
15938 |
"trial_params": null
|