gabrielaltay
commited on
Commit
•
ceff349
1
Parent(s):
85d8d99
Training in progress, step 8088, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 654946216
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bad36090d3fea6bf5c95ba42db0a60c3eebb874b00583f59320f54010495d068
|
3 |
size 654946216
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1310000698
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6081f069ca372d13b29b3847db2ea0377d5705d67197467dc5eb4bba64d27645
|
3 |
size 1310000698
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57e93e92bbbccd397547149085c6875813b56fd6bea353a98cf0179f1892adb6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12390,6 +12390,1777 @@
|
|
12390 |
"learning_rate": 1.4994558226971406e-05,
|
12391 |
"loss": 2.4362,
|
12392 |
"step": 7076
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12393 |
}
|
12394 |
],
|
12395 |
"logging_steps": 4,
|
@@ -12397,7 +14168,7 @@
|
|
12397 |
"num_input_tokens_seen": 0,
|
12398 |
"num_train_epochs": 1,
|
12399 |
"save_steps": 1011,
|
12400 |
-
"total_flos":
|
12401 |
"train_batch_size": 4,
|
12402 |
"trial_name": null,
|
12403 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8002374591867023,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 8088,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12390 |
"learning_rate": 1.4994558226971406e-05,
|
12391 |
"loss": 2.4362,
|
12392 |
"step": 7076
|
12393 |
+
},
|
12394 |
+
{
|
12395 |
+
"epoch": 0.7,
|
12396 |
+
"grad_norm": 1.5943050384521484,
|
12397 |
+
"learning_rate": 1.4974769961412883e-05,
|
12398 |
+
"loss": 2.5309,
|
12399 |
+
"step": 7080
|
12400 |
+
},
|
12401 |
+
{
|
12402 |
+
"epoch": 0.7,
|
12403 |
+
"grad_norm": 1.7004754543304443,
|
12404 |
+
"learning_rate": 1.4954981695854359e-05,
|
12405 |
+
"loss": 2.573,
|
12406 |
+
"step": 7084
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 0.7,
|
12410 |
+
"grad_norm": 1.6682156324386597,
|
12411 |
+
"learning_rate": 1.4935193430295835e-05,
|
12412 |
+
"loss": 2.5718,
|
12413 |
+
"step": 7088
|
12414 |
+
},
|
12415 |
+
{
|
12416 |
+
"epoch": 0.7,
|
12417 |
+
"grad_norm": 1.489130973815918,
|
12418 |
+
"learning_rate": 1.4915405164737312e-05,
|
12419 |
+
"loss": 2.7728,
|
12420 |
+
"step": 7092
|
12421 |
+
},
|
12422 |
+
{
|
12423 |
+
"epoch": 0.7,
|
12424 |
+
"grad_norm": 1.72616708278656,
|
12425 |
+
"learning_rate": 1.4895616899178788e-05,
|
12426 |
+
"loss": 2.6457,
|
12427 |
+
"step": 7096
|
12428 |
+
},
|
12429 |
+
{
|
12430 |
+
"epoch": 0.7,
|
12431 |
+
"grad_norm": 1.5968120098114014,
|
12432 |
+
"learning_rate": 1.4875828633620264e-05,
|
12433 |
+
"loss": 2.4886,
|
12434 |
+
"step": 7100
|
12435 |
+
},
|
12436 |
+
{
|
12437 |
+
"epoch": 0.7,
|
12438 |
+
"grad_norm": 1.7065930366516113,
|
12439 |
+
"learning_rate": 1.485604036806174e-05,
|
12440 |
+
"loss": 2.6894,
|
12441 |
+
"step": 7104
|
12442 |
+
},
|
12443 |
+
{
|
12444 |
+
"epoch": 0.7,
|
12445 |
+
"grad_norm": 1.8437420129776,
|
12446 |
+
"learning_rate": 1.4836252102503217e-05,
|
12447 |
+
"loss": 2.6103,
|
12448 |
+
"step": 7108
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 0.7,
|
12452 |
+
"grad_norm": 1.5512233972549438,
|
12453 |
+
"learning_rate": 1.4816463836944692e-05,
|
12454 |
+
"loss": 2.4657,
|
12455 |
+
"step": 7112
|
12456 |
+
},
|
12457 |
+
{
|
12458 |
+
"epoch": 0.7,
|
12459 |
+
"grad_norm": 1.6278142929077148,
|
12460 |
+
"learning_rate": 1.4796675571386168e-05,
|
12461 |
+
"loss": 2.7127,
|
12462 |
+
"step": 7116
|
12463 |
+
},
|
12464 |
+
{
|
12465 |
+
"epoch": 0.7,
|
12466 |
+
"grad_norm": 1.5600184202194214,
|
12467 |
+
"learning_rate": 1.4776887305827644e-05,
|
12468 |
+
"loss": 2.4707,
|
12469 |
+
"step": 7120
|
12470 |
+
},
|
12471 |
+
{
|
12472 |
+
"epoch": 0.7,
|
12473 |
+
"grad_norm": 1.5605080127716064,
|
12474 |
+
"learning_rate": 1.475709904026912e-05,
|
12475 |
+
"loss": 2.5009,
|
12476 |
+
"step": 7124
|
12477 |
+
},
|
12478 |
+
{
|
12479 |
+
"epoch": 0.71,
|
12480 |
+
"grad_norm": 1.6376603841781616,
|
12481 |
+
"learning_rate": 1.4737310774710597e-05,
|
12482 |
+
"loss": 2.4116,
|
12483 |
+
"step": 7128
|
12484 |
+
},
|
12485 |
+
{
|
12486 |
+
"epoch": 0.71,
|
12487 |
+
"grad_norm": 1.5430164337158203,
|
12488 |
+
"learning_rate": 1.4717522509152073e-05,
|
12489 |
+
"loss": 2.4315,
|
12490 |
+
"step": 7132
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 0.71,
|
12494 |
+
"grad_norm": 1.9249303340911865,
|
12495 |
+
"learning_rate": 1.469773424359355e-05,
|
12496 |
+
"loss": 2.5128,
|
12497 |
+
"step": 7136
|
12498 |
+
},
|
12499 |
+
{
|
12500 |
+
"epoch": 0.71,
|
12501 |
+
"grad_norm": 1.914553165435791,
|
12502 |
+
"learning_rate": 1.4677945978035026e-05,
|
12503 |
+
"loss": 2.5726,
|
12504 |
+
"step": 7140
|
12505 |
+
},
|
12506 |
+
{
|
12507 |
+
"epoch": 0.71,
|
12508 |
+
"grad_norm": 1.6890403032302856,
|
12509 |
+
"learning_rate": 1.4658157712476502e-05,
|
12510 |
+
"loss": 2.5233,
|
12511 |
+
"step": 7144
|
12512 |
+
},
|
12513 |
+
{
|
12514 |
+
"epoch": 0.71,
|
12515 |
+
"grad_norm": 1.6215218305587769,
|
12516 |
+
"learning_rate": 1.463836944691798e-05,
|
12517 |
+
"loss": 2.4541,
|
12518 |
+
"step": 7148
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 0.71,
|
12522 |
+
"grad_norm": 1.6023436784744263,
|
12523 |
+
"learning_rate": 1.4618581181359457e-05,
|
12524 |
+
"loss": 2.5825,
|
12525 |
+
"step": 7152
|
12526 |
+
},
|
12527 |
+
{
|
12528 |
+
"epoch": 0.71,
|
12529 |
+
"grad_norm": 1.6800665855407715,
|
12530 |
+
"learning_rate": 1.459879291580093e-05,
|
12531 |
+
"loss": 2.5275,
|
12532 |
+
"step": 7156
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 0.71,
|
12536 |
+
"grad_norm": 1.5266000032424927,
|
12537 |
+
"learning_rate": 1.4579004650242406e-05,
|
12538 |
+
"loss": 2.6414,
|
12539 |
+
"step": 7160
|
12540 |
+
},
|
12541 |
+
{
|
12542 |
+
"epoch": 0.71,
|
12543 |
+
"grad_norm": 1.660843014717102,
|
12544 |
+
"learning_rate": 1.4559216384683882e-05,
|
12545 |
+
"loss": 2.4934,
|
12546 |
+
"step": 7164
|
12547 |
+
},
|
12548 |
+
{
|
12549 |
+
"epoch": 0.71,
|
12550 |
+
"grad_norm": 1.5643970966339111,
|
12551 |
+
"learning_rate": 1.4539428119125358e-05,
|
12552 |
+
"loss": 2.4847,
|
12553 |
+
"step": 7168
|
12554 |
+
},
|
12555 |
+
{
|
12556 |
+
"epoch": 0.71,
|
12557 |
+
"grad_norm": 1.4681652784347534,
|
12558 |
+
"learning_rate": 1.4519639853566835e-05,
|
12559 |
+
"loss": 2.423,
|
12560 |
+
"step": 7172
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 0.71,
|
12564 |
+
"grad_norm": 1.5900237560272217,
|
12565 |
+
"learning_rate": 1.4499851588008311e-05,
|
12566 |
+
"loss": 2.5645,
|
12567 |
+
"step": 7176
|
12568 |
+
},
|
12569 |
+
{
|
12570 |
+
"epoch": 0.71,
|
12571 |
+
"grad_norm": 1.6541297435760498,
|
12572 |
+
"learning_rate": 1.4480063322449789e-05,
|
12573 |
+
"loss": 2.5434,
|
12574 |
+
"step": 7180
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 0.71,
|
12578 |
+
"grad_norm": 1.5105384588241577,
|
12579 |
+
"learning_rate": 1.4460275056891265e-05,
|
12580 |
+
"loss": 2.6391,
|
12581 |
+
"step": 7184
|
12582 |
+
},
|
12583 |
+
{
|
12584 |
+
"epoch": 0.71,
|
12585 |
+
"grad_norm": 1.6248685121536255,
|
12586 |
+
"learning_rate": 1.4440486791332742e-05,
|
12587 |
+
"loss": 2.6772,
|
12588 |
+
"step": 7188
|
12589 |
+
},
|
12590 |
+
{
|
12591 |
+
"epoch": 0.71,
|
12592 |
+
"grad_norm": 1.559591293334961,
|
12593 |
+
"learning_rate": 1.4420698525774218e-05,
|
12594 |
+
"loss": 2.5175,
|
12595 |
+
"step": 7192
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 0.71,
|
12599 |
+
"grad_norm": 1.73567533493042,
|
12600 |
+
"learning_rate": 1.4400910260215691e-05,
|
12601 |
+
"loss": 2.5799,
|
12602 |
+
"step": 7196
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 0.71,
|
12606 |
+
"grad_norm": 1.602149248123169,
|
12607 |
+
"learning_rate": 1.4381121994657167e-05,
|
12608 |
+
"loss": 2.5475,
|
12609 |
+
"step": 7200
|
12610 |
+
},
|
12611 |
+
{
|
12612 |
+
"epoch": 0.71,
|
12613 |
+
"grad_norm": 1.6269313097000122,
|
12614 |
+
"learning_rate": 1.4361333729098644e-05,
|
12615 |
+
"loss": 2.5729,
|
12616 |
+
"step": 7204
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 0.71,
|
12620 |
+
"grad_norm": 1.5919511318206787,
|
12621 |
+
"learning_rate": 1.4341545463540122e-05,
|
12622 |
+
"loss": 2.4055,
|
12623 |
+
"step": 7208
|
12624 |
+
},
|
12625 |
+
{
|
12626 |
+
"epoch": 0.71,
|
12627 |
+
"grad_norm": 1.9625754356384277,
|
12628 |
+
"learning_rate": 1.4321757197981598e-05,
|
12629 |
+
"loss": 2.6867,
|
12630 |
+
"step": 7212
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 0.71,
|
12634 |
+
"grad_norm": 1.7777873277664185,
|
12635 |
+
"learning_rate": 1.4301968932423074e-05,
|
12636 |
+
"loss": 2.7518,
|
12637 |
+
"step": 7216
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 0.71,
|
12641 |
+
"grad_norm": 1.632881760597229,
|
12642 |
+
"learning_rate": 1.428218066686455e-05,
|
12643 |
+
"loss": 2.3954,
|
12644 |
+
"step": 7220
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 0.71,
|
12648 |
+
"grad_norm": 1.7910826206207275,
|
12649 |
+
"learning_rate": 1.4262392401306027e-05,
|
12650 |
+
"loss": 2.5477,
|
12651 |
+
"step": 7224
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 0.72,
|
12655 |
+
"grad_norm": 1.6172887086868286,
|
12656 |
+
"learning_rate": 1.4242604135747503e-05,
|
12657 |
+
"loss": 2.6665,
|
12658 |
+
"step": 7228
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 0.72,
|
12662 |
+
"grad_norm": 1.7775293588638306,
|
12663 |
+
"learning_rate": 1.422281587018898e-05,
|
12664 |
+
"loss": 2.5237,
|
12665 |
+
"step": 7232
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 0.72,
|
12669 |
+
"grad_norm": 1.6558274030685425,
|
12670 |
+
"learning_rate": 1.4203027604630456e-05,
|
12671 |
+
"loss": 2.5909,
|
12672 |
+
"step": 7236
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 0.72,
|
12676 |
+
"grad_norm": 1.6271092891693115,
|
12677 |
+
"learning_rate": 1.418323933907193e-05,
|
12678 |
+
"loss": 2.5299,
|
12679 |
+
"step": 7240
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 0.72,
|
12683 |
+
"grad_norm": 1.5768835544586182,
|
12684 |
+
"learning_rate": 1.4163451073513407e-05,
|
12685 |
+
"loss": 2.5376,
|
12686 |
+
"step": 7244
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.72,
|
12690 |
+
"grad_norm": 1.641741156578064,
|
12691 |
+
"learning_rate": 1.4143662807954883e-05,
|
12692 |
+
"loss": 2.5802,
|
12693 |
+
"step": 7248
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 0.72,
|
12697 |
+
"grad_norm": 1.5866329669952393,
|
12698 |
+
"learning_rate": 1.412387454239636e-05,
|
12699 |
+
"loss": 2.4445,
|
12700 |
+
"step": 7252
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.72,
|
12704 |
+
"grad_norm": 1.6858747005462646,
|
12705 |
+
"learning_rate": 1.4104086276837836e-05,
|
12706 |
+
"loss": 2.51,
|
12707 |
+
"step": 7256
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 0.72,
|
12711 |
+
"grad_norm": 1.6350436210632324,
|
12712 |
+
"learning_rate": 1.4084298011279312e-05,
|
12713 |
+
"loss": 2.6297,
|
12714 |
+
"step": 7260
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 0.72,
|
12718 |
+
"grad_norm": 1.7628724575042725,
|
12719 |
+
"learning_rate": 1.4064509745720788e-05,
|
12720 |
+
"loss": 2.6294,
|
12721 |
+
"step": 7264
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 0.72,
|
12725 |
+
"grad_norm": 1.6731958389282227,
|
12726 |
+
"learning_rate": 1.4044721480162265e-05,
|
12727 |
+
"loss": 2.5577,
|
12728 |
+
"step": 7268
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 0.72,
|
12732 |
+
"grad_norm": 1.7393099069595337,
|
12733 |
+
"learning_rate": 1.4024933214603741e-05,
|
12734 |
+
"loss": 2.6079,
|
12735 |
+
"step": 7272
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 0.72,
|
12739 |
+
"grad_norm": 1.7639967203140259,
|
12740 |
+
"learning_rate": 1.4005144949045217e-05,
|
12741 |
+
"loss": 2.7389,
|
12742 |
+
"step": 7276
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 0.72,
|
12746 |
+
"grad_norm": 1.5961958169937134,
|
12747 |
+
"learning_rate": 1.3985356683486692e-05,
|
12748 |
+
"loss": 2.5846,
|
12749 |
+
"step": 7280
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 0.72,
|
12753 |
+
"grad_norm": 1.6526871919631958,
|
12754 |
+
"learning_rate": 1.3965568417928168e-05,
|
12755 |
+
"loss": 2.5965,
|
12756 |
+
"step": 7284
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.72,
|
12760 |
+
"grad_norm": 1.7771737575531006,
|
12761 |
+
"learning_rate": 1.3945780152369645e-05,
|
12762 |
+
"loss": 2.5905,
|
12763 |
+
"step": 7288
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 0.72,
|
12767 |
+
"grad_norm": 1.713395118713379,
|
12768 |
+
"learning_rate": 1.3925991886811121e-05,
|
12769 |
+
"loss": 2.5063,
|
12770 |
+
"step": 7292
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 0.72,
|
12774 |
+
"grad_norm": 1.5294148921966553,
|
12775 |
+
"learning_rate": 1.3906203621252597e-05,
|
12776 |
+
"loss": 2.4081,
|
12777 |
+
"step": 7296
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 0.72,
|
12781 |
+
"grad_norm": 1.6980494260787964,
|
12782 |
+
"learning_rate": 1.3886415355694074e-05,
|
12783 |
+
"loss": 2.4693,
|
12784 |
+
"step": 7300
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 0.72,
|
12788 |
+
"grad_norm": 1.703873634338379,
|
12789 |
+
"learning_rate": 1.386662709013555e-05,
|
12790 |
+
"loss": 2.4529,
|
12791 |
+
"step": 7304
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 0.72,
|
12795 |
+
"grad_norm": 1.5093457698822021,
|
12796 |
+
"learning_rate": 1.3846838824577026e-05,
|
12797 |
+
"loss": 2.439,
|
12798 |
+
"step": 7308
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 0.72,
|
12802 |
+
"grad_norm": 1.6407102346420288,
|
12803 |
+
"learning_rate": 1.3827050559018503e-05,
|
12804 |
+
"loss": 2.4581,
|
12805 |
+
"step": 7312
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 0.72,
|
12809 |
+
"grad_norm": 1.485217809677124,
|
12810 |
+
"learning_rate": 1.380726229345998e-05,
|
12811 |
+
"loss": 2.5162,
|
12812 |
+
"step": 7316
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 0.72,
|
12816 |
+
"grad_norm": 1.5687867403030396,
|
12817 |
+
"learning_rate": 1.3787474027901457e-05,
|
12818 |
+
"loss": 2.4868,
|
12819 |
+
"step": 7320
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 0.72,
|
12823 |
+
"grad_norm": 1.5972037315368652,
|
12824 |
+
"learning_rate": 1.376768576234293e-05,
|
12825 |
+
"loss": 2.5713,
|
12826 |
+
"step": 7324
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 0.73,
|
12830 |
+
"grad_norm": 1.6357609033584595,
|
12831 |
+
"learning_rate": 1.3747897496784406e-05,
|
12832 |
+
"loss": 2.5051,
|
12833 |
+
"step": 7328
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 0.73,
|
12837 |
+
"grad_norm": 1.7274508476257324,
|
12838 |
+
"learning_rate": 1.3728109231225883e-05,
|
12839 |
+
"loss": 2.596,
|
12840 |
+
"step": 7332
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 0.73,
|
12844 |
+
"grad_norm": 1.9209715127944946,
|
12845 |
+
"learning_rate": 1.3708320965667359e-05,
|
12846 |
+
"loss": 2.7628,
|
12847 |
+
"step": 7336
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 0.73,
|
12851 |
+
"grad_norm": 1.5816470384597778,
|
12852 |
+
"learning_rate": 1.3688532700108835e-05,
|
12853 |
+
"loss": 2.4371,
|
12854 |
+
"step": 7340
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 0.73,
|
12858 |
+
"grad_norm": 1.6190577745437622,
|
12859 |
+
"learning_rate": 1.3668744434550313e-05,
|
12860 |
+
"loss": 2.4256,
|
12861 |
+
"step": 7344
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 0.73,
|
12865 |
+
"grad_norm": 1.627508282661438,
|
12866 |
+
"learning_rate": 1.364895616899179e-05,
|
12867 |
+
"loss": 2.6653,
|
12868 |
+
"step": 7348
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 0.73,
|
12872 |
+
"grad_norm": 1.6422009468078613,
|
12873 |
+
"learning_rate": 1.3629167903433266e-05,
|
12874 |
+
"loss": 2.6699,
|
12875 |
+
"step": 7352
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 0.73,
|
12879 |
+
"grad_norm": 1.546067476272583,
|
12880 |
+
"learning_rate": 1.3609379637874742e-05,
|
12881 |
+
"loss": 2.4263,
|
12882 |
+
"step": 7356
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 0.73,
|
12886 |
+
"grad_norm": 1.621297836303711,
|
12887 |
+
"learning_rate": 1.3589591372316218e-05,
|
12888 |
+
"loss": 2.4296,
|
12889 |
+
"step": 7360
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 0.73,
|
12893 |
+
"grad_norm": 1.643576741218567,
|
12894 |
+
"learning_rate": 1.3569803106757691e-05,
|
12895 |
+
"loss": 2.5806,
|
12896 |
+
"step": 7364
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 0.73,
|
12900 |
+
"grad_norm": 1.6136977672576904,
|
12901 |
+
"learning_rate": 1.3550014841199168e-05,
|
12902 |
+
"loss": 2.4887,
|
12903 |
+
"step": 7368
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 0.73,
|
12907 |
+
"grad_norm": 1.704002857208252,
|
12908 |
+
"learning_rate": 1.3530226575640644e-05,
|
12909 |
+
"loss": 2.583,
|
12910 |
+
"step": 7372
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 0.73,
|
12914 |
+
"grad_norm": 1.6694648265838623,
|
12915 |
+
"learning_rate": 1.3510438310082122e-05,
|
12916 |
+
"loss": 2.4127,
|
12917 |
+
"step": 7376
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 0.73,
|
12921 |
+
"grad_norm": 1.6585307121276855,
|
12922 |
+
"learning_rate": 1.3490650044523598e-05,
|
12923 |
+
"loss": 2.4128,
|
12924 |
+
"step": 7380
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 0.73,
|
12928 |
+
"grad_norm": 1.7365046739578247,
|
12929 |
+
"learning_rate": 1.3470861778965075e-05,
|
12930 |
+
"loss": 2.6269,
|
12931 |
+
"step": 7384
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 0.73,
|
12935 |
+
"grad_norm": 1.6612383127212524,
|
12936 |
+
"learning_rate": 1.3451073513406551e-05,
|
12937 |
+
"loss": 2.5896,
|
12938 |
+
"step": 7388
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 0.73,
|
12942 |
+
"grad_norm": 1.7151292562484741,
|
12943 |
+
"learning_rate": 1.3431285247848027e-05,
|
12944 |
+
"loss": 2.4705,
|
12945 |
+
"step": 7392
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 0.73,
|
12949 |
+
"grad_norm": 1.608148455619812,
|
12950 |
+
"learning_rate": 1.3411496982289504e-05,
|
12951 |
+
"loss": 2.4956,
|
12952 |
+
"step": 7396
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 0.73,
|
12956 |
+
"grad_norm": 1.9409472942352295,
|
12957 |
+
"learning_rate": 1.339170871673098e-05,
|
12958 |
+
"loss": 2.5163,
|
12959 |
+
"step": 7400
|
12960 |
+
},
|
12961 |
+
{
|
12962 |
+
"epoch": 0.73,
|
12963 |
+
"grad_norm": 1.6790823936462402,
|
12964 |
+
"learning_rate": 1.3371920451172456e-05,
|
12965 |
+
"loss": 2.5319,
|
12966 |
+
"step": 7404
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 0.73,
|
12970 |
+
"grad_norm": 1.833620548248291,
|
12971 |
+
"learning_rate": 1.3352132185613931e-05,
|
12972 |
+
"loss": 2.5467,
|
12973 |
+
"step": 7408
|
12974 |
+
},
|
12975 |
+
{
|
12976 |
+
"epoch": 0.73,
|
12977 |
+
"grad_norm": 1.7631961107254028,
|
12978 |
+
"learning_rate": 1.3332343920055407e-05,
|
12979 |
+
"loss": 2.5748,
|
12980 |
+
"step": 7412
|
12981 |
+
},
|
12982 |
+
{
|
12983 |
+
"epoch": 0.73,
|
12984 |
+
"grad_norm": 1.5768530368804932,
|
12985 |
+
"learning_rate": 1.3312555654496884e-05,
|
12986 |
+
"loss": 2.4908,
|
12987 |
+
"step": 7416
|
12988 |
+
},
|
12989 |
+
{
|
12990 |
+
"epoch": 0.73,
|
12991 |
+
"grad_norm": 1.6658759117126465,
|
12992 |
+
"learning_rate": 1.329276738893836e-05,
|
12993 |
+
"loss": 2.5698,
|
12994 |
+
"step": 7420
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 0.73,
|
12998 |
+
"grad_norm": 1.7477085590362549,
|
12999 |
+
"learning_rate": 1.3272979123379836e-05,
|
13000 |
+
"loss": 2.3864,
|
13001 |
+
"step": 7424
|
13002 |
+
},
|
13003 |
+
{
|
13004 |
+
"epoch": 0.73,
|
13005 |
+
"grad_norm": 1.6787872314453125,
|
13006 |
+
"learning_rate": 1.3253190857821313e-05,
|
13007 |
+
"loss": 2.5065,
|
13008 |
+
"step": 7428
|
13009 |
+
},
|
13010 |
+
{
|
13011 |
+
"epoch": 0.74,
|
13012 |
+
"grad_norm": 1.6128181219100952,
|
13013 |
+
"learning_rate": 1.3233402592262789e-05,
|
13014 |
+
"loss": 2.6635,
|
13015 |
+
"step": 7432
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 0.74,
|
13019 |
+
"grad_norm": 1.4591010808944702,
|
13020 |
+
"learning_rate": 1.3213614326704265e-05,
|
13021 |
+
"loss": 2.4067,
|
13022 |
+
"step": 7436
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 0.74,
|
13026 |
+
"grad_norm": 1.7754063606262207,
|
13027 |
+
"learning_rate": 1.3193826061145742e-05,
|
13028 |
+
"loss": 2.5308,
|
13029 |
+
"step": 7440
|
13030 |
+
},
|
13031 |
+
{
|
13032 |
+
"epoch": 0.74,
|
13033 |
+
"grad_norm": 1.708154559135437,
|
13034 |
+
"learning_rate": 1.3174037795587218e-05,
|
13035 |
+
"loss": 2.4604,
|
13036 |
+
"step": 7444
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 0.74,
|
13040 |
+
"grad_norm": 1.539201021194458,
|
13041 |
+
"learning_rate": 1.3154249530028694e-05,
|
13042 |
+
"loss": 2.5372,
|
13043 |
+
"step": 7448
|
13044 |
+
},
|
13045 |
+
{
|
13046 |
+
"epoch": 0.74,
|
13047 |
+
"grad_norm": 1.8630115985870361,
|
13048 |
+
"learning_rate": 1.3134461264470169e-05,
|
13049 |
+
"loss": 2.5022,
|
13050 |
+
"step": 7452
|
13051 |
+
},
|
13052 |
+
{
|
13053 |
+
"epoch": 0.74,
|
13054 |
+
"grad_norm": 1.7771594524383545,
|
13055 |
+
"learning_rate": 1.3114672998911645e-05,
|
13056 |
+
"loss": 2.3826,
|
13057 |
+
"step": 7456
|
13058 |
+
},
|
13059 |
+
{
|
13060 |
+
"epoch": 0.74,
|
13061 |
+
"grad_norm": 1.8175123929977417,
|
13062 |
+
"learning_rate": 1.3094884733353121e-05,
|
13063 |
+
"loss": 2.5922,
|
13064 |
+
"step": 7460
|
13065 |
+
},
|
13066 |
+
{
|
13067 |
+
"epoch": 0.74,
|
13068 |
+
"grad_norm": 1.4767266511917114,
|
13069 |
+
"learning_rate": 1.3075096467794598e-05,
|
13070 |
+
"loss": 2.5334,
|
13071 |
+
"step": 7464
|
13072 |
+
},
|
13073 |
+
{
|
13074 |
+
"epoch": 0.74,
|
13075 |
+
"grad_norm": 1.7710050344467163,
|
13076 |
+
"learning_rate": 1.3055308202236074e-05,
|
13077 |
+
"loss": 2.5901,
|
13078 |
+
"step": 7468
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 0.74,
|
13082 |
+
"grad_norm": 1.78278648853302,
|
13083 |
+
"learning_rate": 1.303551993667755e-05,
|
13084 |
+
"loss": 2.5621,
|
13085 |
+
"step": 7472
|
13086 |
+
},
|
13087 |
+
{
|
13088 |
+
"epoch": 0.74,
|
13089 |
+
"grad_norm": 1.553341269493103,
|
13090 |
+
"learning_rate": 1.3015731671119027e-05,
|
13091 |
+
"loss": 2.5296,
|
13092 |
+
"step": 7476
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 0.74,
|
13096 |
+
"grad_norm": 1.4547873735427856,
|
13097 |
+
"learning_rate": 1.2995943405560505e-05,
|
13098 |
+
"loss": 2.5812,
|
13099 |
+
"step": 7480
|
13100 |
+
},
|
13101 |
+
{
|
13102 |
+
"epoch": 0.74,
|
13103 |
+
"grad_norm": 1.6034144163131714,
|
13104 |
+
"learning_rate": 1.2976155140001981e-05,
|
13105 |
+
"loss": 2.3507,
|
13106 |
+
"step": 7484
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 0.74,
|
13110 |
+
"grad_norm": 1.6461652517318726,
|
13111 |
+
"learning_rate": 1.2956366874443457e-05,
|
13112 |
+
"loss": 2.6291,
|
13113 |
+
"step": 7488
|
13114 |
+
},
|
13115 |
+
{
|
13116 |
+
"epoch": 0.74,
|
13117 |
+
"grad_norm": 1.6804616451263428,
|
13118 |
+
"learning_rate": 1.293657860888493e-05,
|
13119 |
+
"loss": 2.4514,
|
13120 |
+
"step": 7492
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 0.74,
|
13124 |
+
"grad_norm": 1.5887171030044556,
|
13125 |
+
"learning_rate": 1.2916790343326407e-05,
|
13126 |
+
"loss": 2.386,
|
13127 |
+
"step": 7496
|
13128 |
+
},
|
13129 |
+
{
|
13130 |
+
"epoch": 0.74,
|
13131 |
+
"grad_norm": 1.6213395595550537,
|
13132 |
+
"learning_rate": 1.2897002077767883e-05,
|
13133 |
+
"loss": 2.5569,
|
13134 |
+
"step": 7500
|
13135 |
+
},
|
13136 |
+
{
|
13137 |
+
"epoch": 0.74,
|
13138 |
+
"grad_norm": 1.7485177516937256,
|
13139 |
+
"learning_rate": 1.287721381220936e-05,
|
13140 |
+
"loss": 2.5454,
|
13141 |
+
"step": 7504
|
13142 |
+
},
|
13143 |
+
{
|
13144 |
+
"epoch": 0.74,
|
13145 |
+
"grad_norm": 1.660869836807251,
|
13146 |
+
"learning_rate": 1.2857425546650836e-05,
|
13147 |
+
"loss": 2.5177,
|
13148 |
+
"step": 7508
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 0.74,
|
13152 |
+
"grad_norm": 1.674647331237793,
|
13153 |
+
"learning_rate": 1.2837637281092314e-05,
|
13154 |
+
"loss": 2.3855,
|
13155 |
+
"step": 7512
|
13156 |
+
},
|
13157 |
+
{
|
13158 |
+
"epoch": 0.74,
|
13159 |
+
"grad_norm": 2.053084373474121,
|
13160 |
+
"learning_rate": 1.281784901553379e-05,
|
13161 |
+
"loss": 2.7179,
|
13162 |
+
"step": 7516
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 0.74,
|
13166 |
+
"grad_norm": 1.5778189897537231,
|
13167 |
+
"learning_rate": 1.2798060749975266e-05,
|
13168 |
+
"loss": 2.5341,
|
13169 |
+
"step": 7520
|
13170 |
+
},
|
13171 |
+
{
|
13172 |
+
"epoch": 0.74,
|
13173 |
+
"grad_norm": 1.670196294784546,
|
13174 |
+
"learning_rate": 1.2778272484416743e-05,
|
13175 |
+
"loss": 2.4186,
|
13176 |
+
"step": 7524
|
13177 |
+
},
|
13178 |
+
{
|
13179 |
+
"epoch": 0.74,
|
13180 |
+
"grad_norm": 1.7191470861434937,
|
13181 |
+
"learning_rate": 1.2758484218858219e-05,
|
13182 |
+
"loss": 2.4801,
|
13183 |
+
"step": 7528
|
13184 |
+
},
|
13185 |
+
{
|
13186 |
+
"epoch": 0.75,
|
13187 |
+
"grad_norm": 1.6857608556747437,
|
13188 |
+
"learning_rate": 1.2738695953299695e-05,
|
13189 |
+
"loss": 2.4867,
|
13190 |
+
"step": 7532
|
13191 |
+
},
|
13192 |
+
{
|
13193 |
+
"epoch": 0.75,
|
13194 |
+
"grad_norm": 1.6951642036437988,
|
13195 |
+
"learning_rate": 1.2718907687741168e-05,
|
13196 |
+
"loss": 2.5551,
|
13197 |
+
"step": 7536
|
13198 |
+
},
|
13199 |
+
{
|
13200 |
+
"epoch": 0.75,
|
13201 |
+
"grad_norm": 1.9836032390594482,
|
13202 |
+
"learning_rate": 1.2699119422182646e-05,
|
13203 |
+
"loss": 2.4202,
|
13204 |
+
"step": 7540
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 0.75,
|
13208 |
+
"grad_norm": 1.6894361972808838,
|
13209 |
+
"learning_rate": 1.2679331156624122e-05,
|
13210 |
+
"loss": 2.5553,
|
13211 |
+
"step": 7544
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 0.75,
|
13215 |
+
"grad_norm": 1.6373331546783447,
|
13216 |
+
"learning_rate": 1.2659542891065599e-05,
|
13217 |
+
"loss": 2.3549,
|
13218 |
+
"step": 7548
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 0.75,
|
13222 |
+
"grad_norm": 1.5669854879379272,
|
13223 |
+
"learning_rate": 1.2639754625507075e-05,
|
13224 |
+
"loss": 2.4814,
|
13225 |
+
"step": 7552
|
13226 |
+
},
|
13227 |
+
{
|
13228 |
+
"epoch": 0.75,
|
13229 |
+
"grad_norm": 1.697510838508606,
|
13230 |
+
"learning_rate": 1.2619966359948551e-05,
|
13231 |
+
"loss": 2.5292,
|
13232 |
+
"step": 7556
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 0.75,
|
13236 |
+
"grad_norm": 1.6051623821258545,
|
13237 |
+
"learning_rate": 1.2600178094390028e-05,
|
13238 |
+
"loss": 2.4635,
|
13239 |
+
"step": 7560
|
13240 |
+
},
|
13241 |
+
{
|
13242 |
+
"epoch": 0.75,
|
13243 |
+
"grad_norm": 1.550139307975769,
|
13244 |
+
"learning_rate": 1.2580389828831504e-05,
|
13245 |
+
"loss": 2.5052,
|
13246 |
+
"step": 7564
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 0.75,
|
13250 |
+
"grad_norm": 1.8294285535812378,
|
13251 |
+
"learning_rate": 1.256060156327298e-05,
|
13252 |
+
"loss": 2.6251,
|
13253 |
+
"step": 7568
|
13254 |
+
},
|
13255 |
+
{
|
13256 |
+
"epoch": 0.75,
|
13257 |
+
"grad_norm": 1.731762170791626,
|
13258 |
+
"learning_rate": 1.2540813297714457e-05,
|
13259 |
+
"loss": 2.6812,
|
13260 |
+
"step": 7572
|
13261 |
+
},
|
13262 |
+
{
|
13263 |
+
"epoch": 0.75,
|
13264 |
+
"grad_norm": 1.7276403903961182,
|
13265 |
+
"learning_rate": 1.2521025032155931e-05,
|
13266 |
+
"loss": 2.5587,
|
13267 |
+
"step": 7576
|
13268 |
+
},
|
13269 |
+
{
|
13270 |
+
"epoch": 0.75,
|
13271 |
+
"grad_norm": 1.8602039813995361,
|
13272 |
+
"learning_rate": 1.2501236766597408e-05,
|
13273 |
+
"loss": 2.5649,
|
13274 |
+
"step": 7580
|
13275 |
+
},
|
13276 |
+
{
|
13277 |
+
"epoch": 0.75,
|
13278 |
+
"grad_norm": 1.5579040050506592,
|
13279 |
+
"learning_rate": 1.2481448501038886e-05,
|
13280 |
+
"loss": 2.6513,
|
13281 |
+
"step": 7584
|
13282 |
+
},
|
13283 |
+
{
|
13284 |
+
"epoch": 0.75,
|
13285 |
+
"grad_norm": 1.7131482362747192,
|
13286 |
+
"learning_rate": 1.246166023548036e-05,
|
13287 |
+
"loss": 2.4914,
|
13288 |
+
"step": 7588
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 0.75,
|
13292 |
+
"grad_norm": 1.68119215965271,
|
13293 |
+
"learning_rate": 1.2441871969921837e-05,
|
13294 |
+
"loss": 2.5319,
|
13295 |
+
"step": 7592
|
13296 |
+
},
|
13297 |
+
{
|
13298 |
+
"epoch": 0.75,
|
13299 |
+
"grad_norm": 1.8896154165267944,
|
13300 |
+
"learning_rate": 1.2422083704363313e-05,
|
13301 |
+
"loss": 2.5683,
|
13302 |
+
"step": 7596
|
13303 |
+
},
|
13304 |
+
{
|
13305 |
+
"epoch": 0.75,
|
13306 |
+
"grad_norm": 1.5681391954421997,
|
13307 |
+
"learning_rate": 1.240229543880479e-05,
|
13308 |
+
"loss": 2.4047,
|
13309 |
+
"step": 7600
|
13310 |
+
},
|
13311 |
+
{
|
13312 |
+
"epoch": 0.75,
|
13313 |
+
"grad_norm": 1.4728906154632568,
|
13314 |
+
"learning_rate": 1.2382507173246266e-05,
|
13315 |
+
"loss": 2.5071,
|
13316 |
+
"step": 7604
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 0.75,
|
13320 |
+
"grad_norm": 1.5875272750854492,
|
13321 |
+
"learning_rate": 1.2362718907687742e-05,
|
13322 |
+
"loss": 2.5415,
|
13323 |
+
"step": 7608
|
13324 |
+
},
|
13325 |
+
{
|
13326 |
+
"epoch": 0.75,
|
13327 |
+
"grad_norm": 1.6239516735076904,
|
13328 |
+
"learning_rate": 1.2342930642129218e-05,
|
13329 |
+
"loss": 2.4362,
|
13330 |
+
"step": 7612
|
13331 |
+
},
|
13332 |
+
{
|
13333 |
+
"epoch": 0.75,
|
13334 |
+
"grad_norm": 1.812610149383545,
|
13335 |
+
"learning_rate": 1.2323142376570695e-05,
|
13336 |
+
"loss": 2.5034,
|
13337 |
+
"step": 7616
|
13338 |
+
},
|
13339 |
+
{
|
13340 |
+
"epoch": 0.75,
|
13341 |
+
"grad_norm": 1.6190868616104126,
|
13342 |
+
"learning_rate": 1.2303354111012171e-05,
|
13343 |
+
"loss": 2.4786,
|
13344 |
+
"step": 7620
|
13345 |
+
},
|
13346 |
+
{
|
13347 |
+
"epoch": 0.75,
|
13348 |
+
"grad_norm": 1.6609690189361572,
|
13349 |
+
"learning_rate": 1.2283565845453647e-05,
|
13350 |
+
"loss": 2.372,
|
13351 |
+
"step": 7624
|
13352 |
+
},
|
13353 |
+
{
|
13354 |
+
"epoch": 0.75,
|
13355 |
+
"grad_norm": 1.700492024421692,
|
13356 |
+
"learning_rate": 1.2263777579895122e-05,
|
13357 |
+
"loss": 2.4527,
|
13358 |
+
"step": 7628
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 0.76,
|
13362 |
+
"grad_norm": 1.8237019777297974,
|
13363 |
+
"learning_rate": 1.2243989314336598e-05,
|
13364 |
+
"loss": 2.548,
|
13365 |
+
"step": 7632
|
13366 |
+
},
|
13367 |
+
{
|
13368 |
+
"epoch": 0.76,
|
13369 |
+
"grad_norm": 1.558048129081726,
|
13370 |
+
"learning_rate": 1.2224201048778074e-05,
|
13371 |
+
"loss": 2.5439,
|
13372 |
+
"step": 7636
|
13373 |
+
},
|
13374 |
+
{
|
13375 |
+
"epoch": 0.76,
|
13376 |
+
"grad_norm": 1.7001163959503174,
|
13377 |
+
"learning_rate": 1.220441278321955e-05,
|
13378 |
+
"loss": 2.5146,
|
13379 |
+
"step": 7640
|
13380 |
+
},
|
13381 |
+
{
|
13382 |
+
"epoch": 0.76,
|
13383 |
+
"grad_norm": 1.5829248428344727,
|
13384 |
+
"learning_rate": 1.2184624517661027e-05,
|
13385 |
+
"loss": 2.5473,
|
13386 |
+
"step": 7644
|
13387 |
+
},
|
13388 |
+
{
|
13389 |
+
"epoch": 0.76,
|
13390 |
+
"grad_norm": 1.5922554731369019,
|
13391 |
+
"learning_rate": 1.2164836252102505e-05,
|
13392 |
+
"loss": 2.5482,
|
13393 |
+
"step": 7648
|
13394 |
+
},
|
13395 |
+
{
|
13396 |
+
"epoch": 0.76,
|
13397 |
+
"grad_norm": 1.5552897453308105,
|
13398 |
+
"learning_rate": 1.214504798654398e-05,
|
13399 |
+
"loss": 2.7146,
|
13400 |
+
"step": 7652
|
13401 |
+
},
|
13402 |
+
{
|
13403 |
+
"epoch": 0.76,
|
13404 |
+
"grad_norm": 1.6641972064971924,
|
13405 |
+
"learning_rate": 1.2125259720985456e-05,
|
13406 |
+
"loss": 2.5317,
|
13407 |
+
"step": 7656
|
13408 |
+
},
|
13409 |
+
{
|
13410 |
+
"epoch": 0.76,
|
13411 |
+
"grad_norm": 1.727538824081421,
|
13412 |
+
"learning_rate": 1.2105471455426932e-05,
|
13413 |
+
"loss": 2.511,
|
13414 |
+
"step": 7660
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 0.76,
|
13418 |
+
"grad_norm": 1.7466334104537964,
|
13419 |
+
"learning_rate": 1.2085683189868409e-05,
|
13420 |
+
"loss": 2.6592,
|
13421 |
+
"step": 7664
|
13422 |
+
},
|
13423 |
+
{
|
13424 |
+
"epoch": 0.76,
|
13425 |
+
"grad_norm": 1.578805923461914,
|
13426 |
+
"learning_rate": 1.2065894924309885e-05,
|
13427 |
+
"loss": 2.5405,
|
13428 |
+
"step": 7668
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 0.76,
|
13432 |
+
"grad_norm": 1.7704498767852783,
|
13433 |
+
"learning_rate": 1.204610665875136e-05,
|
13434 |
+
"loss": 2.7104,
|
13435 |
+
"step": 7672
|
13436 |
+
},
|
13437 |
+
{
|
13438 |
+
"epoch": 0.76,
|
13439 |
+
"grad_norm": 1.901253342628479,
|
13440 |
+
"learning_rate": 1.2026318393192838e-05,
|
13441 |
+
"loss": 2.68,
|
13442 |
+
"step": 7676
|
13443 |
+
},
|
13444 |
+
{
|
13445 |
+
"epoch": 0.76,
|
13446 |
+
"grad_norm": 1.6251075267791748,
|
13447 |
+
"learning_rate": 1.2006530127634314e-05,
|
13448 |
+
"loss": 2.5327,
|
13449 |
+
"step": 7680
|
13450 |
+
},
|
13451 |
+
{
|
13452 |
+
"epoch": 0.76,
|
13453 |
+
"grad_norm": 1.9433711767196655,
|
13454 |
+
"learning_rate": 1.198674186207579e-05,
|
13455 |
+
"loss": 2.664,
|
13456 |
+
"step": 7684
|
13457 |
+
},
|
13458 |
+
{
|
13459 |
+
"epoch": 0.76,
|
13460 |
+
"grad_norm": 1.6232560873031616,
|
13461 |
+
"learning_rate": 1.1966953596517267e-05,
|
13462 |
+
"loss": 2.3881,
|
13463 |
+
"step": 7688
|
13464 |
+
},
|
13465 |
+
{
|
13466 |
+
"epoch": 0.76,
|
13467 |
+
"grad_norm": 1.625562310218811,
|
13468 |
+
"learning_rate": 1.1947165330958741e-05,
|
13469 |
+
"loss": 2.3981,
|
13470 |
+
"step": 7692
|
13471 |
+
},
|
13472 |
+
{
|
13473 |
+
"epoch": 0.76,
|
13474 |
+
"grad_norm": 1.8203229904174805,
|
13475 |
+
"learning_rate": 1.1927377065400218e-05,
|
13476 |
+
"loss": 2.5587,
|
13477 |
+
"step": 7696
|
13478 |
+
},
|
13479 |
+
{
|
13480 |
+
"epoch": 0.76,
|
13481 |
+
"grad_norm": 1.5125036239624023,
|
13482 |
+
"learning_rate": 1.1907588799841694e-05,
|
13483 |
+
"loss": 2.548,
|
13484 |
+
"step": 7700
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 0.76,
|
13488 |
+
"grad_norm": 1.7927870750427246,
|
13489 |
+
"learning_rate": 1.188780053428317e-05,
|
13490 |
+
"loss": 2.5317,
|
13491 |
+
"step": 7704
|
13492 |
+
},
|
13493 |
+
{
|
13494 |
+
"epoch": 0.76,
|
13495 |
+
"grad_norm": 1.853130578994751,
|
13496 |
+
"learning_rate": 1.1868012268724647e-05,
|
13497 |
+
"loss": 2.5032,
|
13498 |
+
"step": 7708
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 0.76,
|
13502 |
+
"grad_norm": 1.8347195386886597,
|
13503 |
+
"learning_rate": 1.1848224003166123e-05,
|
13504 |
+
"loss": 2.3406,
|
13505 |
+
"step": 7712
|
13506 |
+
},
|
13507 |
+
{
|
13508 |
+
"epoch": 0.76,
|
13509 |
+
"grad_norm": 1.6292403936386108,
|
13510 |
+
"learning_rate": 1.18284357376076e-05,
|
13511 |
+
"loss": 2.4874,
|
13512 |
+
"step": 7716
|
13513 |
+
},
|
13514 |
+
{
|
13515 |
+
"epoch": 0.76,
|
13516 |
+
"grad_norm": 1.6553095579147339,
|
13517 |
+
"learning_rate": 1.1808647472049076e-05,
|
13518 |
+
"loss": 2.5065,
|
13519 |
+
"step": 7720
|
13520 |
+
},
|
13521 |
+
{
|
13522 |
+
"epoch": 0.76,
|
13523 |
+
"grad_norm": 1.9831068515777588,
|
13524 |
+
"learning_rate": 1.1788859206490552e-05,
|
13525 |
+
"loss": 2.5079,
|
13526 |
+
"step": 7724
|
13527 |
+
},
|
13528 |
+
{
|
13529 |
+
"epoch": 0.76,
|
13530 |
+
"grad_norm": 1.7396299839019775,
|
13531 |
+
"learning_rate": 1.1769070940932028e-05,
|
13532 |
+
"loss": 2.5171,
|
13533 |
+
"step": 7728
|
13534 |
+
},
|
13535 |
+
{
|
13536 |
+
"epoch": 0.77,
|
13537 |
+
"grad_norm": 1.8842509984970093,
|
13538 |
+
"learning_rate": 1.1749282675373505e-05,
|
13539 |
+
"loss": 2.6561,
|
13540 |
+
"step": 7732
|
13541 |
+
},
|
13542 |
+
{
|
13543 |
+
"epoch": 0.77,
|
13544 |
+
"grad_norm": 1.5185445547103882,
|
13545 |
+
"learning_rate": 1.1729494409814979e-05,
|
13546 |
+
"loss": 2.6638,
|
13547 |
+
"step": 7736
|
13548 |
+
},
|
13549 |
+
{
|
13550 |
+
"epoch": 0.77,
|
13551 |
+
"grad_norm": 1.7892464399337769,
|
13552 |
+
"learning_rate": 1.1709706144256455e-05,
|
13553 |
+
"loss": 2.4509,
|
13554 |
+
"step": 7740
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 0.77,
|
13558 |
+
"grad_norm": 1.6770362854003906,
|
13559 |
+
"learning_rate": 1.1689917878697933e-05,
|
13560 |
+
"loss": 2.536,
|
13561 |
+
"step": 7744
|
13562 |
+
},
|
13563 |
+
{
|
13564 |
+
"epoch": 0.77,
|
13565 |
+
"grad_norm": 1.6587915420532227,
|
13566 |
+
"learning_rate": 1.167012961313941e-05,
|
13567 |
+
"loss": 2.515,
|
13568 |
+
"step": 7748
|
13569 |
+
},
|
13570 |
+
{
|
13571 |
+
"epoch": 0.77,
|
13572 |
+
"grad_norm": 1.7976752519607544,
|
13573 |
+
"learning_rate": 1.1650341347580886e-05,
|
13574 |
+
"loss": 2.4524,
|
13575 |
+
"step": 7752
|
13576 |
+
},
|
13577 |
+
{
|
13578 |
+
"epoch": 0.77,
|
13579 |
+
"grad_norm": 1.7145442962646484,
|
13580 |
+
"learning_rate": 1.163055308202236e-05,
|
13581 |
+
"loss": 2.56,
|
13582 |
+
"step": 7756
|
13583 |
+
},
|
13584 |
+
{
|
13585 |
+
"epoch": 0.77,
|
13586 |
+
"grad_norm": 1.6737264394760132,
|
13587 |
+
"learning_rate": 1.1610764816463837e-05,
|
13588 |
+
"loss": 2.45,
|
13589 |
+
"step": 7760
|
13590 |
+
},
|
13591 |
+
{
|
13592 |
+
"epoch": 0.77,
|
13593 |
+
"grad_norm": 1.683347225189209,
|
13594 |
+
"learning_rate": 1.1590976550905313e-05,
|
13595 |
+
"loss": 2.4508,
|
13596 |
+
"step": 7764
|
13597 |
+
},
|
13598 |
+
{
|
13599 |
+
"epoch": 0.77,
|
13600 |
+
"grad_norm": 1.6320801973342896,
|
13601 |
+
"learning_rate": 1.157118828534679e-05,
|
13602 |
+
"loss": 2.6438,
|
13603 |
+
"step": 7768
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 0.77,
|
13607 |
+
"grad_norm": 1.8501825332641602,
|
13608 |
+
"learning_rate": 1.1551400019788266e-05,
|
13609 |
+
"loss": 2.4262,
|
13610 |
+
"step": 7772
|
13611 |
+
},
|
13612 |
+
{
|
13613 |
+
"epoch": 0.77,
|
13614 |
+
"grad_norm": 1.8582115173339844,
|
13615 |
+
"learning_rate": 1.1531611754229742e-05,
|
13616 |
+
"loss": 2.4314,
|
13617 |
+
"step": 7776
|
13618 |
+
},
|
13619 |
+
{
|
13620 |
+
"epoch": 0.77,
|
13621 |
+
"grad_norm": 1.5969737768173218,
|
13622 |
+
"learning_rate": 1.1511823488671219e-05,
|
13623 |
+
"loss": 2.4917,
|
13624 |
+
"step": 7780
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 0.77,
|
13628 |
+
"grad_norm": 1.5447652339935303,
|
13629 |
+
"learning_rate": 1.1492035223112695e-05,
|
13630 |
+
"loss": 2.5134,
|
13631 |
+
"step": 7784
|
13632 |
+
},
|
13633 |
+
{
|
13634 |
+
"epoch": 0.77,
|
13635 |
+
"grad_norm": 1.7579408884048462,
|
13636 |
+
"learning_rate": 1.1472246957554171e-05,
|
13637 |
+
"loss": 2.4526,
|
13638 |
+
"step": 7788
|
13639 |
+
},
|
13640 |
+
{
|
13641 |
+
"epoch": 0.77,
|
13642 |
+
"grad_norm": 1.6958955526351929,
|
13643 |
+
"learning_rate": 1.1452458691995648e-05,
|
13644 |
+
"loss": 2.5719,
|
13645 |
+
"step": 7792
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 0.77,
|
13649 |
+
"grad_norm": 1.6478573083877563,
|
13650 |
+
"learning_rate": 1.1432670426437124e-05,
|
13651 |
+
"loss": 2.5227,
|
13652 |
+
"step": 7796
|
13653 |
+
},
|
13654 |
+
{
|
13655 |
+
"epoch": 0.77,
|
13656 |
+
"grad_norm": 1.5438200235366821,
|
13657 |
+
"learning_rate": 1.1412882160878599e-05,
|
13658 |
+
"loss": 2.3835,
|
13659 |
+
"step": 7800
|
13660 |
+
},
|
13661 |
+
{
|
13662 |
+
"epoch": 0.77,
|
13663 |
+
"grad_norm": 1.5718754529953003,
|
13664 |
+
"learning_rate": 1.1393093895320075e-05,
|
13665 |
+
"loss": 2.4568,
|
13666 |
+
"step": 7804
|
13667 |
+
},
|
13668 |
+
{
|
13669 |
+
"epoch": 0.77,
|
13670 |
+
"grad_norm": 1.6531394720077515,
|
13671 |
+
"learning_rate": 1.1373305629761551e-05,
|
13672 |
+
"loss": 2.3707,
|
13673 |
+
"step": 7808
|
13674 |
+
},
|
13675 |
+
{
|
13676 |
+
"epoch": 0.77,
|
13677 |
+
"grad_norm": 1.8568472862243652,
|
13678 |
+
"learning_rate": 1.135351736420303e-05,
|
13679 |
+
"loss": 2.583,
|
13680 |
+
"step": 7812
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 0.77,
|
13684 |
+
"grad_norm": 1.7812650203704834,
|
13685 |
+
"learning_rate": 1.1333729098644506e-05,
|
13686 |
+
"loss": 2.5865,
|
13687 |
+
"step": 7816
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 0.77,
|
13691 |
+
"grad_norm": 1.7314536571502686,
|
13692 |
+
"learning_rate": 1.131394083308598e-05,
|
13693 |
+
"loss": 2.5384,
|
13694 |
+
"step": 7820
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 0.77,
|
13698 |
+
"grad_norm": 1.8775840997695923,
|
13699 |
+
"learning_rate": 1.1294152567527457e-05,
|
13700 |
+
"loss": 2.6276,
|
13701 |
+
"step": 7824
|
13702 |
+
},
|
13703 |
+
{
|
13704 |
+
"epoch": 0.77,
|
13705 |
+
"grad_norm": 1.3948659896850586,
|
13706 |
+
"learning_rate": 1.1274364301968933e-05,
|
13707 |
+
"loss": 2.5302,
|
13708 |
+
"step": 7828
|
13709 |
+
},
|
13710 |
+
{
|
13711 |
+
"epoch": 0.77,
|
13712 |
+
"grad_norm": 1.690691590309143,
|
13713 |
+
"learning_rate": 1.125457603641041e-05,
|
13714 |
+
"loss": 2.5569,
|
13715 |
+
"step": 7832
|
13716 |
+
},
|
13717 |
+
{
|
13718 |
+
"epoch": 0.78,
|
13719 |
+
"grad_norm": 1.606754183769226,
|
13720 |
+
"learning_rate": 1.1234787770851885e-05,
|
13721 |
+
"loss": 2.3866,
|
13722 |
+
"step": 7836
|
13723 |
+
},
|
13724 |
+
{
|
13725 |
+
"epoch": 0.78,
|
13726 |
+
"grad_norm": 1.5848886966705322,
|
13727 |
+
"learning_rate": 1.1214999505293362e-05,
|
13728 |
+
"loss": 2.5407,
|
13729 |
+
"step": 7840
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 0.78,
|
13733 |
+
"grad_norm": 1.5651729106903076,
|
13734 |
+
"learning_rate": 1.1195211239734838e-05,
|
13735 |
+
"loss": 2.2891,
|
13736 |
+
"step": 7844
|
13737 |
+
},
|
13738 |
+
{
|
13739 |
+
"epoch": 0.78,
|
13740 |
+
"grad_norm": 1.704177975654602,
|
13741 |
+
"learning_rate": 1.1175422974176314e-05,
|
13742 |
+
"loss": 2.6963,
|
13743 |
+
"step": 7848
|
13744 |
+
},
|
13745 |
+
{
|
13746 |
+
"epoch": 0.78,
|
13747 |
+
"grad_norm": 1.605334997177124,
|
13748 |
+
"learning_rate": 1.115563470861779e-05,
|
13749 |
+
"loss": 2.5576,
|
13750 |
+
"step": 7852
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 0.78,
|
13754 |
+
"grad_norm": 1.530823826789856,
|
13755 |
+
"learning_rate": 1.1135846443059267e-05,
|
13756 |
+
"loss": 2.4859,
|
13757 |
+
"step": 7856
|
13758 |
+
},
|
13759 |
+
{
|
13760 |
+
"epoch": 0.78,
|
13761 |
+
"grad_norm": 1.6835219860076904,
|
13762 |
+
"learning_rate": 1.1116058177500742e-05,
|
13763 |
+
"loss": 2.462,
|
13764 |
+
"step": 7860
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 0.78,
|
13768 |
+
"grad_norm": 1.6420457363128662,
|
13769 |
+
"learning_rate": 1.1096269911942218e-05,
|
13770 |
+
"loss": 2.6175,
|
13771 |
+
"step": 7864
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 0.78,
|
13775 |
+
"grad_norm": 1.6330996751785278,
|
13776 |
+
"learning_rate": 1.1076481646383694e-05,
|
13777 |
+
"loss": 2.6308,
|
13778 |
+
"step": 7868
|
13779 |
+
},
|
13780 |
+
{
|
13781 |
+
"epoch": 0.78,
|
13782 |
+
"grad_norm": 2.031244993209839,
|
13783 |
+
"learning_rate": 1.105669338082517e-05,
|
13784 |
+
"loss": 2.4599,
|
13785 |
+
"step": 7872
|
13786 |
+
},
|
13787 |
+
{
|
13788 |
+
"epoch": 0.78,
|
13789 |
+
"grad_norm": 1.5154794454574585,
|
13790 |
+
"learning_rate": 1.1036905115266647e-05,
|
13791 |
+
"loss": 2.4949,
|
13792 |
+
"step": 7876
|
13793 |
+
},
|
13794 |
+
{
|
13795 |
+
"epoch": 0.78,
|
13796 |
+
"grad_norm": 1.7397664785385132,
|
13797 |
+
"learning_rate": 1.1017116849708125e-05,
|
13798 |
+
"loss": 2.3962,
|
13799 |
+
"step": 7880
|
13800 |
+
},
|
13801 |
+
{
|
13802 |
+
"epoch": 0.78,
|
13803 |
+
"grad_norm": 1.7765430212020874,
|
13804 |
+
"learning_rate": 1.09973285841496e-05,
|
13805 |
+
"loss": 2.4214,
|
13806 |
+
"step": 7884
|
13807 |
+
},
|
13808 |
+
{
|
13809 |
+
"epoch": 0.78,
|
13810 |
+
"grad_norm": 1.6177083253860474,
|
13811 |
+
"learning_rate": 1.0977540318591076e-05,
|
13812 |
+
"loss": 2.5221,
|
13813 |
+
"step": 7888
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 0.78,
|
13817 |
+
"grad_norm": 1.6060839891433716,
|
13818 |
+
"learning_rate": 1.0957752053032552e-05,
|
13819 |
+
"loss": 2.6571,
|
13820 |
+
"step": 7892
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 0.78,
|
13824 |
+
"grad_norm": 1.6707481145858765,
|
13825 |
+
"learning_rate": 1.0937963787474029e-05,
|
13826 |
+
"loss": 2.373,
|
13827 |
+
"step": 7896
|
13828 |
+
},
|
13829 |
+
{
|
13830 |
+
"epoch": 0.78,
|
13831 |
+
"grad_norm": 1.8542253971099854,
|
13832 |
+
"learning_rate": 1.0918175521915505e-05,
|
13833 |
+
"loss": 2.4029,
|
13834 |
+
"step": 7900
|
13835 |
+
},
|
13836 |
+
{
|
13837 |
+
"epoch": 0.78,
|
13838 |
+
"grad_norm": 1.6408201456069946,
|
13839 |
+
"learning_rate": 1.089838725635698e-05,
|
13840 |
+
"loss": 2.4065,
|
13841 |
+
"step": 7904
|
13842 |
+
},
|
13843 |
+
{
|
13844 |
+
"epoch": 0.78,
|
13845 |
+
"grad_norm": 1.5829615592956543,
|
13846 |
+
"learning_rate": 1.0878598990798458e-05,
|
13847 |
+
"loss": 2.6859,
|
13848 |
+
"step": 7908
|
13849 |
+
},
|
13850 |
+
{
|
13851 |
+
"epoch": 0.78,
|
13852 |
+
"grad_norm": 1.5252026319503784,
|
13853 |
+
"learning_rate": 1.0858810725239934e-05,
|
13854 |
+
"loss": 2.5326,
|
13855 |
+
"step": 7912
|
13856 |
+
},
|
13857 |
+
{
|
13858 |
+
"epoch": 0.78,
|
13859 |
+
"grad_norm": 1.5747402906417847,
|
13860 |
+
"learning_rate": 1.083902245968141e-05,
|
13861 |
+
"loss": 2.5311,
|
13862 |
+
"step": 7916
|
13863 |
+
},
|
13864 |
+
{
|
13865 |
+
"epoch": 0.78,
|
13866 |
+
"grad_norm": 1.592838168144226,
|
13867 |
+
"learning_rate": 1.0819234194122887e-05,
|
13868 |
+
"loss": 2.3582,
|
13869 |
+
"step": 7920
|
13870 |
+
},
|
13871 |
+
{
|
13872 |
+
"epoch": 0.78,
|
13873 |
+
"grad_norm": 1.5652263164520264,
|
13874 |
+
"learning_rate": 1.0799445928564361e-05,
|
13875 |
+
"loss": 2.5613,
|
13876 |
+
"step": 7924
|
13877 |
+
},
|
13878 |
+
{
|
13879 |
+
"epoch": 0.78,
|
13880 |
+
"grad_norm": 1.5245355367660522,
|
13881 |
+
"learning_rate": 1.0779657663005837e-05,
|
13882 |
+
"loss": 2.7002,
|
13883 |
+
"step": 7928
|
13884 |
+
},
|
13885 |
+
{
|
13886 |
+
"epoch": 0.78,
|
13887 |
+
"grad_norm": 1.724204421043396,
|
13888 |
+
"learning_rate": 1.0759869397447314e-05,
|
13889 |
+
"loss": 2.9039,
|
13890 |
+
"step": 7932
|
13891 |
+
},
|
13892 |
+
{
|
13893 |
+
"epoch": 0.79,
|
13894 |
+
"grad_norm": 1.7106647491455078,
|
13895 |
+
"learning_rate": 1.074008113188879e-05,
|
13896 |
+
"loss": 2.5086,
|
13897 |
+
"step": 7936
|
13898 |
+
},
|
13899 |
+
{
|
13900 |
+
"epoch": 0.79,
|
13901 |
+
"grad_norm": 1.5160934925079346,
|
13902 |
+
"learning_rate": 1.0720292866330266e-05,
|
13903 |
+
"loss": 2.447,
|
13904 |
+
"step": 7940
|
13905 |
+
},
|
13906 |
+
{
|
13907 |
+
"epoch": 0.79,
|
13908 |
+
"grad_norm": 1.833022952079773,
|
13909 |
+
"learning_rate": 1.0700504600771743e-05,
|
13910 |
+
"loss": 2.4753,
|
13911 |
+
"step": 7944
|
13912 |
+
},
|
13913 |
+
{
|
13914 |
+
"epoch": 0.79,
|
13915 |
+
"grad_norm": 1.7385578155517578,
|
13916 |
+
"learning_rate": 1.0680716335213219e-05,
|
13917 |
+
"loss": 2.6183,
|
13918 |
+
"step": 7948
|
13919 |
+
},
|
13920 |
+
{
|
13921 |
+
"epoch": 0.79,
|
13922 |
+
"grad_norm": 1.8667320013046265,
|
13923 |
+
"learning_rate": 1.0660928069654695e-05,
|
13924 |
+
"loss": 2.5605,
|
13925 |
+
"step": 7952
|
13926 |
+
},
|
13927 |
+
{
|
13928 |
+
"epoch": 0.79,
|
13929 |
+
"grad_norm": 1.5835785865783691,
|
13930 |
+
"learning_rate": 1.0641139804096172e-05,
|
13931 |
+
"loss": 2.4019,
|
13932 |
+
"step": 7956
|
13933 |
+
},
|
13934 |
+
{
|
13935 |
+
"epoch": 0.79,
|
13936 |
+
"grad_norm": 1.7108229398727417,
|
13937 |
+
"learning_rate": 1.0621351538537648e-05,
|
13938 |
+
"loss": 2.708,
|
13939 |
+
"step": 7960
|
13940 |
+
},
|
13941 |
+
{
|
13942 |
+
"epoch": 0.79,
|
13943 |
+
"grad_norm": 1.674804925918579,
|
13944 |
+
"learning_rate": 1.0601563272979124e-05,
|
13945 |
+
"loss": 2.5102,
|
13946 |
+
"step": 7964
|
13947 |
+
},
|
13948 |
+
{
|
13949 |
+
"epoch": 0.79,
|
13950 |
+
"grad_norm": 1.955705165863037,
|
13951 |
+
"learning_rate": 1.0581775007420599e-05,
|
13952 |
+
"loss": 2.5221,
|
13953 |
+
"step": 7968
|
13954 |
+
},
|
13955 |
+
{
|
13956 |
+
"epoch": 0.79,
|
13957 |
+
"grad_norm": 1.740157961845398,
|
13958 |
+
"learning_rate": 1.0561986741862075e-05,
|
13959 |
+
"loss": 2.5232,
|
13960 |
+
"step": 7972
|
13961 |
+
},
|
13962 |
+
{
|
13963 |
+
"epoch": 0.79,
|
13964 |
+
"grad_norm": 1.6809014081954956,
|
13965 |
+
"learning_rate": 1.0542198476303553e-05,
|
13966 |
+
"loss": 2.3372,
|
13967 |
+
"step": 7976
|
13968 |
+
},
|
13969 |
+
{
|
13970 |
+
"epoch": 0.79,
|
13971 |
+
"grad_norm": 1.7483688592910767,
|
13972 |
+
"learning_rate": 1.052241021074503e-05,
|
13973 |
+
"loss": 2.4913,
|
13974 |
+
"step": 7980
|
13975 |
+
},
|
13976 |
+
{
|
13977 |
+
"epoch": 0.79,
|
13978 |
+
"grad_norm": 1.5279000997543335,
|
13979 |
+
"learning_rate": 1.0502621945186506e-05,
|
13980 |
+
"loss": 2.6386,
|
13981 |
+
"step": 7984
|
13982 |
+
},
|
13983 |
+
{
|
13984 |
+
"epoch": 0.79,
|
13985 |
+
"grad_norm": 1.7373197078704834,
|
13986 |
+
"learning_rate": 1.048283367962798e-05,
|
13987 |
+
"loss": 2.5823,
|
13988 |
+
"step": 7988
|
13989 |
+
},
|
13990 |
+
{
|
13991 |
+
"epoch": 0.79,
|
13992 |
+
"grad_norm": 1.6561895608901978,
|
13993 |
+
"learning_rate": 1.0463045414069457e-05,
|
13994 |
+
"loss": 2.4531,
|
13995 |
+
"step": 7992
|
13996 |
+
},
|
13997 |
+
{
|
13998 |
+
"epoch": 0.79,
|
13999 |
+
"grad_norm": 1.6324809789657593,
|
14000 |
+
"learning_rate": 1.0443257148510933e-05,
|
14001 |
+
"loss": 2.4943,
|
14002 |
+
"step": 7996
|
14003 |
+
},
|
14004 |
+
{
|
14005 |
+
"epoch": 0.79,
|
14006 |
+
"grad_norm": 1.4932702779769897,
|
14007 |
+
"learning_rate": 1.042346888295241e-05,
|
14008 |
+
"loss": 2.5745,
|
14009 |
+
"step": 8000
|
14010 |
+
},
|
14011 |
+
{
|
14012 |
+
"epoch": 0.79,
|
14013 |
+
"grad_norm": 1.6975860595703125,
|
14014 |
+
"learning_rate": 1.0403680617393886e-05,
|
14015 |
+
"loss": 2.5195,
|
14016 |
+
"step": 8004
|
14017 |
+
},
|
14018 |
+
{
|
14019 |
+
"epoch": 0.79,
|
14020 |
+
"grad_norm": 1.5818710327148438,
|
14021 |
+
"learning_rate": 1.0383892351835362e-05,
|
14022 |
+
"loss": 2.5146,
|
14023 |
+
"step": 8008
|
14024 |
+
},
|
14025 |
+
{
|
14026 |
+
"epoch": 0.79,
|
14027 |
+
"grad_norm": 1.6648439168930054,
|
14028 |
+
"learning_rate": 1.0364104086276839e-05,
|
14029 |
+
"loss": 2.5038,
|
14030 |
+
"step": 8012
|
14031 |
+
},
|
14032 |
+
{
|
14033 |
+
"epoch": 0.79,
|
14034 |
+
"grad_norm": 1.8664870262145996,
|
14035 |
+
"learning_rate": 1.0344315820718315e-05,
|
14036 |
+
"loss": 2.4395,
|
14037 |
+
"step": 8016
|
14038 |
+
},
|
14039 |
+
{
|
14040 |
+
"epoch": 0.79,
|
14041 |
+
"grad_norm": 1.6299059391021729,
|
14042 |
+
"learning_rate": 1.0324527555159791e-05,
|
14043 |
+
"loss": 2.632,
|
14044 |
+
"step": 8020
|
14045 |
+
},
|
14046 |
+
{
|
14047 |
+
"epoch": 0.79,
|
14048 |
+
"grad_norm": 1.6017898321151733,
|
14049 |
+
"learning_rate": 1.0304739289601268e-05,
|
14050 |
+
"loss": 2.523,
|
14051 |
+
"step": 8024
|
14052 |
+
},
|
14053 |
+
{
|
14054 |
+
"epoch": 0.79,
|
14055 |
+
"grad_norm": 1.8422410488128662,
|
14056 |
+
"learning_rate": 1.0284951024042744e-05,
|
14057 |
+
"loss": 2.5135,
|
14058 |
+
"step": 8028
|
14059 |
+
},
|
14060 |
+
{
|
14061 |
+
"epoch": 0.79,
|
14062 |
+
"grad_norm": 1.7916709184646606,
|
14063 |
+
"learning_rate": 1.0265162758484218e-05,
|
14064 |
+
"loss": 2.3953,
|
14065 |
+
"step": 8032
|
14066 |
+
},
|
14067 |
+
{
|
14068 |
+
"epoch": 0.8,
|
14069 |
+
"grad_norm": 1.6924244165420532,
|
14070 |
+
"learning_rate": 1.0245374492925695e-05,
|
14071 |
+
"loss": 2.5083,
|
14072 |
+
"step": 8036
|
14073 |
+
},
|
14074 |
+
{
|
14075 |
+
"epoch": 0.8,
|
14076 |
+
"grad_norm": 1.6407674551010132,
|
14077 |
+
"learning_rate": 1.0225586227367171e-05,
|
14078 |
+
"loss": 2.5489,
|
14079 |
+
"step": 8040
|
14080 |
+
},
|
14081 |
+
{
|
14082 |
+
"epoch": 0.8,
|
14083 |
+
"grad_norm": 2.751863718032837,
|
14084 |
+
"learning_rate": 1.0205797961808649e-05,
|
14085 |
+
"loss": 2.5538,
|
14086 |
+
"step": 8044
|
14087 |
+
},
|
14088 |
+
{
|
14089 |
+
"epoch": 0.8,
|
14090 |
+
"grad_norm": 1.8018790483474731,
|
14091 |
+
"learning_rate": 1.0186009696250125e-05,
|
14092 |
+
"loss": 2.4931,
|
14093 |
+
"step": 8048
|
14094 |
+
},
|
14095 |
+
{
|
14096 |
+
"epoch": 0.8,
|
14097 |
+
"grad_norm": 1.6749464273452759,
|
14098 |
+
"learning_rate": 1.01662214306916e-05,
|
14099 |
+
"loss": 2.5116,
|
14100 |
+
"step": 8052
|
14101 |
+
},
|
14102 |
+
{
|
14103 |
+
"epoch": 0.8,
|
14104 |
+
"grad_norm": 1.8620166778564453,
|
14105 |
+
"learning_rate": 1.0146433165133076e-05,
|
14106 |
+
"loss": 2.4857,
|
14107 |
+
"step": 8056
|
14108 |
+
},
|
14109 |
+
{
|
14110 |
+
"epoch": 0.8,
|
14111 |
+
"grad_norm": 1.8505146503448486,
|
14112 |
+
"learning_rate": 1.0126644899574553e-05,
|
14113 |
+
"loss": 2.535,
|
14114 |
+
"step": 8060
|
14115 |
+
},
|
14116 |
+
{
|
14117 |
+
"epoch": 0.8,
|
14118 |
+
"grad_norm": 1.4946340322494507,
|
14119 |
+
"learning_rate": 1.0106856634016029e-05,
|
14120 |
+
"loss": 2.3432,
|
14121 |
+
"step": 8064
|
14122 |
+
},
|
14123 |
+
{
|
14124 |
+
"epoch": 0.8,
|
14125 |
+
"grad_norm": 1.8005412817001343,
|
14126 |
+
"learning_rate": 1.0087068368457505e-05,
|
14127 |
+
"loss": 2.4651,
|
14128 |
+
"step": 8068
|
14129 |
+
},
|
14130 |
+
{
|
14131 |
+
"epoch": 0.8,
|
14132 |
+
"grad_norm": 1.786773443222046,
|
14133 |
+
"learning_rate": 1.006728010289898e-05,
|
14134 |
+
"loss": 2.4242,
|
14135 |
+
"step": 8072
|
14136 |
+
},
|
14137 |
+
{
|
14138 |
+
"epoch": 0.8,
|
14139 |
+
"grad_norm": 1.7355870008468628,
|
14140 |
+
"learning_rate": 1.0047491837340458e-05,
|
14141 |
+
"loss": 2.4785,
|
14142 |
+
"step": 8076
|
14143 |
+
},
|
14144 |
+
{
|
14145 |
+
"epoch": 0.8,
|
14146 |
+
"grad_norm": 1.677388310432434,
|
14147 |
+
"learning_rate": 1.0027703571781934e-05,
|
14148 |
+
"loss": 2.5572,
|
14149 |
+
"step": 8080
|
14150 |
+
},
|
14151 |
+
{
|
14152 |
+
"epoch": 0.8,
|
14153 |
+
"grad_norm": 1.7865822315216064,
|
14154 |
+
"learning_rate": 1.000791530622341e-05,
|
14155 |
+
"loss": 2.771,
|
14156 |
+
"step": 8084
|
14157 |
+
},
|
14158 |
+
{
|
14159 |
+
"epoch": 0.8,
|
14160 |
+
"grad_norm": 1.8583182096481323,
|
14161 |
+
"learning_rate": 9.988127040664887e-06,
|
14162 |
+
"loss": 2.4894,
|
14163 |
+
"step": 8088
|
14164 |
}
|
14165 |
],
|
14166 |
"logging_steps": 4,
|
|
|
14168 |
"num_input_tokens_seen": 0,
|
14169 |
"num_train_epochs": 1,
|
14170 |
"save_steps": 1011,
|
14171 |
+
"total_flos": 9.891653776677274e+16,
|
14172 |
"train_batch_size": 4,
|
14173 |
"trial_name": null,
|
14174 |
"trial_params": null
|