jonathanjordan21
commited on
Commit
•
7cffce7
1
Parent(s):
c44e918
Upload folder using huggingface_hub
Browse files- data/model.safetensors +1 -1
- data/optimizer.pt +1 -1
- data/rng_state.pth +1 -1
- data/scheduler.pt +1 -1
- data/trainer_state.json +2334 -3
data/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 576008736
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02a118a503a23e518774fed3f4dec339d8da14145ceac0e9fffee2ae001ad0f2
|
3 |
size 576008736
|
data/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1152256984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95a066061ad59333db8588202a134c688a5fa8bf89d8941dea7651093313aa50
|
3 |
size 1152256984
|
data/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aea246e6f4af30d76fc739911915f34c10cbfe67d00ea529a3053e734d22782
|
3 |
size 14244
|
data/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:485d39d405243b01acdfe502b2e8c0eb6d4698b82fde169faed5a021088dcbc6
|
3 |
size 1064
|
data/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14007,6 +14007,2337 @@
|
|
14007 |
"learning_rate": 2.974807414080627e-05,
|
14008 |
"loss": 1.9742,
|
14009 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14010 |
}
|
14011 |
],
|
14012 |
"logging_steps": 15,
|
@@ -14026,7 +16357,7 @@
|
|
14026 |
"attributes": {}
|
14027 |
}
|
14028 |
},
|
14029 |
-
"total_flos":
|
14030 |
"train_batch_size": 4,
|
14031 |
"trial_name": null,
|
14032 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.06815729925728017,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 35000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14007 |
"learning_rate": 2.974807414080627e-05,
|
14008 |
"loss": 1.9742,
|
14009 |
"step": 30000
|
14010 |
+
},
|
14011 |
+
{
|
14012 |
+
"epoch": 0.05844975249163613,
|
14013 |
+
"grad_norm": 4.261213302612305,
|
14014 |
+
"learning_rate": 2.9747822860414652e-05,
|
14015 |
+
"loss": 1.747,
|
14016 |
+
"step": 30015
|
14017 |
+
},
|
14018 |
+
{
|
14019 |
+
"epoch": 0.05847896276274639,
|
14020 |
+
"grad_norm": 2.775156259536743,
|
14021 |
+
"learning_rate": 2.9747571455829544e-05,
|
14022 |
+
"loss": 1.8235,
|
14023 |
+
"step": 30030
|
14024 |
+
},
|
14025 |
+
{
|
14026 |
+
"epoch": 0.05850817303385665,
|
14027 |
+
"grad_norm": 4.271034240722656,
|
14028 |
+
"learning_rate": 2.9747319927053066e-05,
|
14029 |
+
"loss": 1.834,
|
14030 |
+
"step": 30045
|
14031 |
+
},
|
14032 |
+
{
|
14033 |
+
"epoch": 0.05853738330496691,
|
14034 |
+
"grad_norm": 3.628793239593506,
|
14035 |
+
"learning_rate": 2.9747068274087327e-05,
|
14036 |
+
"loss": 2.0451,
|
14037 |
+
"step": 30060
|
14038 |
+
},
|
14039 |
+
{
|
14040 |
+
"epoch": 0.05856659357607718,
|
14041 |
+
"grad_norm": 2.388171434402466,
|
14042 |
+
"learning_rate": 2.9746816496934452e-05,
|
14043 |
+
"loss": 1.9012,
|
14044 |
+
"step": 30075
|
14045 |
+
},
|
14046 |
+
{
|
14047 |
+
"epoch": 0.05859580384718744,
|
14048 |
+
"grad_norm": 5.260224342346191,
|
14049 |
+
"learning_rate": 2.9746564595596566e-05,
|
14050 |
+
"loss": 1.8394,
|
14051 |
+
"step": 30090
|
14052 |
+
},
|
14053 |
+
{
|
14054 |
+
"epoch": 0.0586250141182977,
|
14055 |
+
"grad_norm": 3.47929310798645,
|
14056 |
+
"learning_rate": 2.9746312570075786e-05,
|
14057 |
+
"loss": 1.7858,
|
14058 |
+
"step": 30105
|
14059 |
+
},
|
14060 |
+
{
|
14061 |
+
"epoch": 0.05865422438940797,
|
14062 |
+
"grad_norm": 2.082864761352539,
|
14063 |
+
"learning_rate": 2.974606042037423e-05,
|
14064 |
+
"loss": 1.8976,
|
14065 |
+
"step": 30120
|
14066 |
+
},
|
14067 |
+
{
|
14068 |
+
"epoch": 0.05868343466051823,
|
14069 |
+
"grad_norm": 2.003638982772827,
|
14070 |
+
"learning_rate": 2.9745808146494027e-05,
|
14071 |
+
"loss": 1.7789,
|
14072 |
+
"step": 30135
|
14073 |
+
},
|
14074 |
+
{
|
14075 |
+
"epoch": 0.05871264493162849,
|
14076 |
+
"grad_norm": 3.8583061695098877,
|
14077 |
+
"learning_rate": 2.9745555748437298e-05,
|
14078 |
+
"loss": 1.9207,
|
14079 |
+
"step": 30150
|
14080 |
+
},
|
14081 |
+
{
|
14082 |
+
"epoch": 0.05874185520273875,
|
14083 |
+
"grad_norm": 4.0564703941345215,
|
14084 |
+
"learning_rate": 2.9745303226206172e-05,
|
14085 |
+
"loss": 1.9422,
|
14086 |
+
"step": 30165
|
14087 |
+
},
|
14088 |
+
{
|
14089 |
+
"epoch": 0.05877106547384902,
|
14090 |
+
"grad_norm": 1.928350806236267,
|
14091 |
+
"learning_rate": 2.974505057980277e-05,
|
14092 |
+
"loss": 1.8624,
|
14093 |
+
"step": 30180
|
14094 |
+
},
|
14095 |
+
{
|
14096 |
+
"epoch": 0.05880027574495928,
|
14097 |
+
"grad_norm": 2.1937801837921143,
|
14098 |
+
"learning_rate": 2.974479780922923e-05,
|
14099 |
+
"loss": 1.9617,
|
14100 |
+
"step": 30195
|
14101 |
+
},
|
14102 |
+
{
|
14103 |
+
"epoch": 0.05882948601606954,
|
14104 |
+
"grad_norm": 4.145959854125977,
|
14105 |
+
"learning_rate": 2.974454491448767e-05,
|
14106 |
+
"loss": 1.8212,
|
14107 |
+
"step": 30210
|
14108 |
+
},
|
14109 |
+
{
|
14110 |
+
"epoch": 0.05885869628717981,
|
14111 |
+
"grad_norm": 4.780306816101074,
|
14112 |
+
"learning_rate": 2.9744291895580222e-05,
|
14113 |
+
"loss": 1.7573,
|
14114 |
+
"step": 30225
|
14115 |
+
},
|
14116 |
+
{
|
14117 |
+
"epoch": 0.05888790655829007,
|
14118 |
+
"grad_norm": 3.6125333309173584,
|
14119 |
+
"learning_rate": 2.9744038752509016e-05,
|
14120 |
+
"loss": 1.9406,
|
14121 |
+
"step": 30240
|
14122 |
+
},
|
14123 |
+
{
|
14124 |
+
"epoch": 0.05891711682940033,
|
14125 |
+
"grad_norm": 2.673916816711426,
|
14126 |
+
"learning_rate": 2.974378548527619e-05,
|
14127 |
+
"loss": 1.8403,
|
14128 |
+
"step": 30255
|
14129 |
+
},
|
14130 |
+
{
|
14131 |
+
"epoch": 0.05894632710051059,
|
14132 |
+
"grad_norm": 2.3269338607788086,
|
14133 |
+
"learning_rate": 2.9743532093883877e-05,
|
14134 |
+
"loss": 2.1496,
|
14135 |
+
"step": 30270
|
14136 |
+
},
|
14137 |
+
{
|
14138 |
+
"epoch": 0.05897553737162086,
|
14139 |
+
"grad_norm": 4.606401443481445,
|
14140 |
+
"learning_rate": 2.9743278578334205e-05,
|
14141 |
+
"loss": 1.7679,
|
14142 |
+
"step": 30285
|
14143 |
+
},
|
14144 |
+
{
|
14145 |
+
"epoch": 0.05900474764273112,
|
14146 |
+
"grad_norm": 4.217224597930908,
|
14147 |
+
"learning_rate": 2.9743024938629303e-05,
|
14148 |
+
"loss": 1.8091,
|
14149 |
+
"step": 30300
|
14150 |
+
},
|
14151 |
+
{
|
14152 |
+
"epoch": 0.05903395791384138,
|
14153 |
+
"grad_norm": 2.7839462757110596,
|
14154 |
+
"learning_rate": 2.974277117477132e-05,
|
14155 |
+
"loss": 1.9216,
|
14156 |
+
"step": 30315
|
14157 |
+
},
|
14158 |
+
{
|
14159 |
+
"epoch": 0.05906316818495165,
|
14160 |
+
"grad_norm": 2.616410970687866,
|
14161 |
+
"learning_rate": 2.9742517286762386e-05,
|
14162 |
+
"loss": 1.7535,
|
14163 |
+
"step": 30330
|
14164 |
+
},
|
14165 |
+
{
|
14166 |
+
"epoch": 0.05909237845606191,
|
14167 |
+
"grad_norm": 2.646209239959717,
|
14168 |
+
"learning_rate": 2.974226327460464e-05,
|
14169 |
+
"loss": 1.7763,
|
14170 |
+
"step": 30345
|
14171 |
+
},
|
14172 |
+
{
|
14173 |
+
"epoch": 0.05912158872717217,
|
14174 |
+
"grad_norm": 4.532891750335693,
|
14175 |
+
"learning_rate": 2.9742009138300225e-05,
|
14176 |
+
"loss": 1.9677,
|
14177 |
+
"step": 30360
|
14178 |
+
},
|
14179 |
+
{
|
14180 |
+
"epoch": 0.05915079899828243,
|
14181 |
+
"grad_norm": 3.891831874847412,
|
14182 |
+
"learning_rate": 2.9741754877851277e-05,
|
14183 |
+
"loss": 1.8411,
|
14184 |
+
"step": 30375
|
14185 |
+
},
|
14186 |
+
{
|
14187 |
+
"epoch": 0.0591800092693927,
|
14188 |
+
"grad_norm": 2.234100580215454,
|
14189 |
+
"learning_rate": 2.9741500493259933e-05,
|
14190 |
+
"loss": 1.77,
|
14191 |
+
"step": 30390
|
14192 |
+
},
|
14193 |
+
{
|
14194 |
+
"epoch": 0.05920921954050296,
|
14195 |
+
"grad_norm": 3.4290971755981445,
|
14196 |
+
"learning_rate": 2.9741245984528342e-05,
|
14197 |
+
"loss": 1.9234,
|
14198 |
+
"step": 30405
|
14199 |
+
},
|
14200 |
+
{
|
14201 |
+
"epoch": 0.05923842981161322,
|
14202 |
+
"grad_norm": 2.175567150115967,
|
14203 |
+
"learning_rate": 2.974099135165865e-05,
|
14204 |
+
"loss": 2.1054,
|
14205 |
+
"step": 30420
|
14206 |
+
},
|
14207 |
+
{
|
14208 |
+
"epoch": 0.05926764008272349,
|
14209 |
+
"grad_norm": 2.412052631378174,
|
14210 |
+
"learning_rate": 2.9740736594652992e-05,
|
14211 |
+
"loss": 1.8607,
|
14212 |
+
"step": 30435
|
14213 |
+
},
|
14214 |
+
{
|
14215 |
+
"epoch": 0.05929685035383375,
|
14216 |
+
"grad_norm": 2.27789044380188,
|
14217 |
+
"learning_rate": 2.974048171351352e-05,
|
14218 |
+
"loss": 1.9491,
|
14219 |
+
"step": 30450
|
14220 |
+
},
|
14221 |
+
{
|
14222 |
+
"epoch": 0.05932606062494401,
|
14223 |
+
"grad_norm": 3.9195199012756348,
|
14224 |
+
"learning_rate": 2.9740226708242375e-05,
|
14225 |
+
"loss": 1.8516,
|
14226 |
+
"step": 30465
|
14227 |
+
},
|
14228 |
+
{
|
14229 |
+
"epoch": 0.059355270896054274,
|
14230 |
+
"grad_norm": 2.3743503093719482,
|
14231 |
+
"learning_rate": 2.973997157884171e-05,
|
14232 |
+
"loss": 2.1744,
|
14233 |
+
"step": 30480
|
14234 |
+
},
|
14235 |
+
{
|
14236 |
+
"epoch": 0.05938448116716454,
|
14237 |
+
"grad_norm": 4.110390663146973,
|
14238 |
+
"learning_rate": 2.9739716325313676e-05,
|
14239 |
+
"loss": 1.8803,
|
14240 |
+
"step": 30495
|
14241 |
+
},
|
14242 |
+
{
|
14243 |
+
"epoch": 0.0594136914382748,
|
14244 |
+
"grad_norm": 3.738240957260132,
|
14245 |
+
"learning_rate": 2.973946094766041e-05,
|
14246 |
+
"loss": 1.7978,
|
14247 |
+
"step": 30510
|
14248 |
+
},
|
14249 |
+
{
|
14250 |
+
"epoch": 0.059442901709385064,
|
14251 |
+
"grad_norm": 2.840871572494507,
|
14252 |
+
"learning_rate": 2.9739205445884075e-05,
|
14253 |
+
"loss": 1.8413,
|
14254 |
+
"step": 30525
|
14255 |
+
},
|
14256 |
+
{
|
14257 |
+
"epoch": 0.059472111980495325,
|
14258 |
+
"grad_norm": 2.931602954864502,
|
14259 |
+
"learning_rate": 2.9738949819986814e-05,
|
14260 |
+
"loss": 1.9035,
|
14261 |
+
"step": 30540
|
14262 |
+
},
|
14263 |
+
{
|
14264 |
+
"epoch": 0.05950132225160559,
|
14265 |
+
"grad_norm": 2.814918041229248,
|
14266 |
+
"learning_rate": 2.9738694069970787e-05,
|
14267 |
+
"loss": 1.9169,
|
14268 |
+
"step": 30555
|
14269 |
+
},
|
14270 |
+
{
|
14271 |
+
"epoch": 0.059530532522715854,
|
14272 |
+
"grad_norm": 4.824680805206299,
|
14273 |
+
"learning_rate": 2.9738438195838145e-05,
|
14274 |
+
"loss": 1.8696,
|
14275 |
+
"step": 30570
|
14276 |
+
},
|
14277 |
+
{
|
14278 |
+
"epoch": 0.059559742793826115,
|
14279 |
+
"grad_norm": 2.1868393421173096,
|
14280 |
+
"learning_rate": 2.9738182197591038e-05,
|
14281 |
+
"loss": 1.8737,
|
14282 |
+
"step": 30585
|
14283 |
+
},
|
14284 |
+
{
|
14285 |
+
"epoch": 0.05958895306493638,
|
14286 |
+
"grad_norm": 4.172234535217285,
|
14287 |
+
"learning_rate": 2.973792607523163e-05,
|
14288 |
+
"loss": 1.8148,
|
14289 |
+
"step": 30600
|
14290 |
+
},
|
14291 |
+
{
|
14292 |
+
"epoch": 0.059618163336046644,
|
14293 |
+
"grad_norm": 4.052221775054932,
|
14294 |
+
"learning_rate": 2.9737669828762074e-05,
|
14295 |
+
"loss": 1.8824,
|
14296 |
+
"step": 30615
|
14297 |
+
},
|
14298 |
+
{
|
14299 |
+
"epoch": 0.059647373607156905,
|
14300 |
+
"grad_norm": 3.6041007041931152,
|
14301 |
+
"learning_rate": 2.9737413458184522e-05,
|
14302 |
+
"loss": 1.8255,
|
14303 |
+
"step": 30630
|
14304 |
+
},
|
14305 |
+
{
|
14306 |
+
"epoch": 0.059676583878267166,
|
14307 |
+
"grad_norm": 4.06122350692749,
|
14308 |
+
"learning_rate": 2.9737156963501143e-05,
|
14309 |
+
"loss": 1.951,
|
14310 |
+
"step": 30645
|
14311 |
+
},
|
14312 |
+
{
|
14313 |
+
"epoch": 0.059705794149377434,
|
14314 |
+
"grad_norm": 2.3008453845977783,
|
14315 |
+
"learning_rate": 2.973690034471409e-05,
|
14316 |
+
"loss": 1.758,
|
14317 |
+
"step": 30660
|
14318 |
+
},
|
14319 |
+
{
|
14320 |
+
"epoch": 0.059735004420487695,
|
14321 |
+
"grad_norm": 3.7912003993988037,
|
14322 |
+
"learning_rate": 2.9736643601825532e-05,
|
14323 |
+
"loss": 1.8688,
|
14324 |
+
"step": 30675
|
14325 |
+
},
|
14326 |
+
{
|
14327 |
+
"epoch": 0.059764214691597956,
|
14328 |
+
"grad_norm": 3.7779648303985596,
|
14329 |
+
"learning_rate": 2.9736386734837616e-05,
|
14330 |
+
"loss": 1.7613,
|
14331 |
+
"step": 30690
|
14332 |
+
},
|
14333 |
+
{
|
14334 |
+
"epoch": 0.05979342496270822,
|
14335 |
+
"grad_norm": 4.632726669311523,
|
14336 |
+
"learning_rate": 2.9736129743752523e-05,
|
14337 |
+
"loss": 1.6212,
|
14338 |
+
"step": 30705
|
14339 |
+
},
|
14340 |
+
{
|
14341 |
+
"epoch": 0.059822635233818484,
|
14342 |
+
"grad_norm": 4.628555774688721,
|
14343 |
+
"learning_rate": 2.9735872628572404e-05,
|
14344 |
+
"loss": 1.8643,
|
14345 |
+
"step": 30720
|
14346 |
+
},
|
14347 |
+
{
|
14348 |
+
"epoch": 0.059851845504928745,
|
14349 |
+
"grad_norm": 3.8072681427001953,
|
14350 |
+
"learning_rate": 2.9735615389299434e-05,
|
14351 |
+
"loss": 1.7805,
|
14352 |
+
"step": 30735
|
14353 |
+
},
|
14354 |
+
{
|
14355 |
+
"epoch": 0.059881055776039006,
|
14356 |
+
"grad_norm": 3.9826717376708984,
|
14357 |
+
"learning_rate": 2.973535802593577e-05,
|
14358 |
+
"loss": 1.8594,
|
14359 |
+
"step": 30750
|
14360 |
+
},
|
14361 |
+
{
|
14362 |
+
"epoch": 0.059910266047149274,
|
14363 |
+
"grad_norm": 3.04962420463562,
|
14364 |
+
"learning_rate": 2.9735100538483587e-05,
|
14365 |
+
"loss": 1.9104,
|
14366 |
+
"step": 30765
|
14367 |
+
},
|
14368 |
+
{
|
14369 |
+
"epoch": 0.059939476318259535,
|
14370 |
+
"grad_norm": 2.8500261306762695,
|
14371 |
+
"learning_rate": 2.9734842926945048e-05,
|
14372 |
+
"loss": 1.717,
|
14373 |
+
"step": 30780
|
14374 |
+
},
|
14375 |
+
{
|
14376 |
+
"epoch": 0.059968686589369796,
|
14377 |
+
"grad_norm": 1.5368775129318237,
|
14378 |
+
"learning_rate": 2.9734585191322324e-05,
|
14379 |
+
"loss": 1.8905,
|
14380 |
+
"step": 30795
|
14381 |
+
},
|
14382 |
+
{
|
14383 |
+
"epoch": 0.059997896860480064,
|
14384 |
+
"grad_norm": 3.886584997177124,
|
14385 |
+
"learning_rate": 2.9734327331617588e-05,
|
14386 |
+
"loss": 1.804,
|
14387 |
+
"step": 30810
|
14388 |
+
},
|
14389 |
+
{
|
14390 |
+
"epoch": 0.060027107131590325,
|
14391 |
+
"grad_norm": 3.792663812637329,
|
14392 |
+
"learning_rate": 2.9734069347833005e-05,
|
14393 |
+
"loss": 1.9667,
|
14394 |
+
"step": 30825
|
14395 |
+
},
|
14396 |
+
{
|
14397 |
+
"epoch": 0.060056317402700586,
|
14398 |
+
"grad_norm": 3.7504067420959473,
|
14399 |
+
"learning_rate": 2.9733811239970756e-05,
|
14400 |
+
"loss": 1.9053,
|
14401 |
+
"step": 30840
|
14402 |
+
},
|
14403 |
+
{
|
14404 |
+
"epoch": 0.06008552767381085,
|
14405 |
+
"grad_norm": 2.617370367050171,
|
14406 |
+
"learning_rate": 2.973355300803301e-05,
|
14407 |
+
"loss": 1.8853,
|
14408 |
+
"step": 30855
|
14409 |
+
},
|
14410 |
+
{
|
14411 |
+
"epoch": 0.060114737944921115,
|
14412 |
+
"grad_norm": 3.2594456672668457,
|
14413 |
+
"learning_rate": 2.973329465202194e-05,
|
14414 |
+
"loss": 1.7045,
|
14415 |
+
"step": 30870
|
14416 |
+
},
|
14417 |
+
{
|
14418 |
+
"epoch": 0.060143948216031376,
|
14419 |
+
"grad_norm": 1.8166978359222412,
|
14420 |
+
"learning_rate": 2.9733036171939723e-05,
|
14421 |
+
"loss": 1.8431,
|
14422 |
+
"step": 30885
|
14423 |
+
},
|
14424 |
+
{
|
14425 |
+
"epoch": 0.06017315848714164,
|
14426 |
+
"grad_norm": 2.9280359745025635,
|
14427 |
+
"learning_rate": 2.973277756778854e-05,
|
14428 |
+
"loss": 1.9088,
|
14429 |
+
"step": 30900
|
14430 |
+
},
|
14431 |
+
{
|
14432 |
+
"epoch": 0.060202368758251905,
|
14433 |
+
"grad_norm": 4.446992874145508,
|
14434 |
+
"learning_rate": 2.973251883957056e-05,
|
14435 |
+
"loss": 1.7744,
|
14436 |
+
"step": 30915
|
14437 |
+
},
|
14438 |
+
{
|
14439 |
+
"epoch": 0.060231579029362166,
|
14440 |
+
"grad_norm": 4.279955863952637,
|
14441 |
+
"learning_rate": 2.9732259987287967e-05,
|
14442 |
+
"loss": 1.8309,
|
14443 |
+
"step": 30930
|
14444 |
+
},
|
14445 |
+
{
|
14446 |
+
"epoch": 0.06026078930047243,
|
14447 |
+
"grad_norm": 2.3727381229400635,
|
14448 |
+
"learning_rate": 2.9732001010942946e-05,
|
14449 |
+
"loss": 1.9247,
|
14450 |
+
"step": 30945
|
14451 |
+
},
|
14452 |
+
{
|
14453 |
+
"epoch": 0.06028999957158269,
|
14454 |
+
"grad_norm": 2.6014645099639893,
|
14455 |
+
"learning_rate": 2.9731741910537668e-05,
|
14456 |
+
"loss": 1.7653,
|
14457 |
+
"step": 30960
|
14458 |
+
},
|
14459 |
+
{
|
14460 |
+
"epoch": 0.060319209842692956,
|
14461 |
+
"grad_norm": 2.387563467025757,
|
14462 |
+
"learning_rate": 2.973148268607432e-05,
|
14463 |
+
"loss": 1.7814,
|
14464 |
+
"step": 30975
|
14465 |
+
},
|
14466 |
+
{
|
14467 |
+
"epoch": 0.06034842011380322,
|
14468 |
+
"grad_norm": 3.4683475494384766,
|
14469 |
+
"learning_rate": 2.9731223337555088e-05,
|
14470 |
+
"loss": 1.8522,
|
14471 |
+
"step": 30990
|
14472 |
+
},
|
14473 |
+
{
|
14474 |
+
"epoch": 0.06037763038491348,
|
14475 |
+
"grad_norm": 2.352069854736328,
|
14476 |
+
"learning_rate": 2.973096386498215e-05,
|
14477 |
+
"loss": 2.0015,
|
14478 |
+
"step": 31005
|
14479 |
+
},
|
14480 |
+
{
|
14481 |
+
"epoch": 0.06040684065602374,
|
14482 |
+
"grad_norm": 3.2257015705108643,
|
14483 |
+
"learning_rate": 2.9730704268357694e-05,
|
14484 |
+
"loss": 1.8176,
|
14485 |
+
"step": 31020
|
14486 |
+
},
|
14487 |
+
{
|
14488 |
+
"epoch": 0.06043605092713401,
|
14489 |
+
"grad_norm": 2.4939286708831787,
|
14490 |
+
"learning_rate": 2.97304445476839e-05,
|
14491 |
+
"loss": 1.7388,
|
14492 |
+
"step": 31035
|
14493 |
+
},
|
14494 |
+
{
|
14495 |
+
"epoch": 0.06046526119824427,
|
14496 |
+
"grad_norm": 3.216533660888672,
|
14497 |
+
"learning_rate": 2.9730184702962968e-05,
|
14498 |
+
"loss": 1.859,
|
14499 |
+
"step": 31050
|
14500 |
+
},
|
14501 |
+
{
|
14502 |
+
"epoch": 0.06049447146935453,
|
14503 |
+
"grad_norm": 3.4212570190429688,
|
14504 |
+
"learning_rate": 2.9729924734197077e-05,
|
14505 |
+
"loss": 1.7748,
|
14506 |
+
"step": 31065
|
14507 |
+
},
|
14508 |
+
{
|
14509 |
+
"epoch": 0.060523681740464796,
|
14510 |
+
"grad_norm": 3.6867172718048096,
|
14511 |
+
"learning_rate": 2.9729664641388417e-05,
|
14512 |
+
"loss": 2.0345,
|
14513 |
+
"step": 31080
|
14514 |
+
},
|
14515 |
+
{
|
14516 |
+
"epoch": 0.06055289201157506,
|
14517 |
+
"grad_norm": 3.243898630142212,
|
14518 |
+
"learning_rate": 2.9729404424539183e-05,
|
14519 |
+
"loss": 1.9114,
|
14520 |
+
"step": 31095
|
14521 |
+
},
|
14522 |
+
{
|
14523 |
+
"epoch": 0.06058210228268532,
|
14524 |
+
"grad_norm": 2.8866801261901855,
|
14525 |
+
"learning_rate": 2.9729144083651554e-05,
|
14526 |
+
"loss": 1.9833,
|
14527 |
+
"step": 31110
|
14528 |
+
},
|
14529 |
+
{
|
14530 |
+
"epoch": 0.06061131255379558,
|
14531 |
+
"grad_norm": 3.0051145553588867,
|
14532 |
+
"learning_rate": 2.9728883618727738e-05,
|
14533 |
+
"loss": 2.026,
|
14534 |
+
"step": 31125
|
14535 |
+
},
|
14536 |
+
{
|
14537 |
+
"epoch": 0.06064052282490585,
|
14538 |
+
"grad_norm": 3.0416595935821533,
|
14539 |
+
"learning_rate": 2.972862302976992e-05,
|
14540 |
+
"loss": 1.8541,
|
14541 |
+
"step": 31140
|
14542 |
+
},
|
14543 |
+
{
|
14544 |
+
"epoch": 0.06066973309601611,
|
14545 |
+
"grad_norm": 2.835458517074585,
|
14546 |
+
"learning_rate": 2.9728362316780294e-05,
|
14547 |
+
"loss": 1.8636,
|
14548 |
+
"step": 31155
|
14549 |
+
},
|
14550 |
+
{
|
14551 |
+
"epoch": 0.06069894336712637,
|
14552 |
+
"grad_norm": 2.40006947517395,
|
14553 |
+
"learning_rate": 2.972810147976106e-05,
|
14554 |
+
"loss": 1.9535,
|
14555 |
+
"step": 31170
|
14556 |
+
},
|
14557 |
+
{
|
14558 |
+
"epoch": 0.06072815363823664,
|
14559 |
+
"grad_norm": 2.7326056957244873,
|
14560 |
+
"learning_rate": 2.9727840518714415e-05,
|
14561 |
+
"loss": 1.7814,
|
14562 |
+
"step": 31185
|
14563 |
+
},
|
14564 |
+
{
|
14565 |
+
"epoch": 0.0607573639093469,
|
14566 |
+
"grad_norm": 4.403411388397217,
|
14567 |
+
"learning_rate": 2.972757943364255e-05,
|
14568 |
+
"loss": 1.7441,
|
14569 |
+
"step": 31200
|
14570 |
+
},
|
14571 |
+
{
|
14572 |
+
"epoch": 0.06078657418045716,
|
14573 |
+
"grad_norm": 2.259716272354126,
|
14574 |
+
"learning_rate": 2.9727318224547667e-05,
|
14575 |
+
"loss": 1.8447,
|
14576 |
+
"step": 31215
|
14577 |
+
},
|
14578 |
+
{
|
14579 |
+
"epoch": 0.06081578445156742,
|
14580 |
+
"grad_norm": 2.397702217102051,
|
14581 |
+
"learning_rate": 2.9727056891431966e-05,
|
14582 |
+
"loss": 1.7677,
|
14583 |
+
"step": 31230
|
14584 |
+
},
|
14585 |
+
{
|
14586 |
+
"epoch": 0.06084499472267769,
|
14587 |
+
"grad_norm": 3.840811014175415,
|
14588 |
+
"learning_rate": 2.9726795434297646e-05,
|
14589 |
+
"loss": 1.7271,
|
14590 |
+
"step": 31245
|
14591 |
+
},
|
14592 |
+
{
|
14593 |
+
"epoch": 0.06087420499378795,
|
14594 |
+
"grad_norm": 2.695345163345337,
|
14595 |
+
"learning_rate": 2.9726533853146913e-05,
|
14596 |
+
"loss": 1.9575,
|
14597 |
+
"step": 31260
|
14598 |
+
},
|
14599 |
+
{
|
14600 |
+
"epoch": 0.06090341526489821,
|
14601 |
+
"grad_norm": 2.330676555633545,
|
14602 |
+
"learning_rate": 2.972627214798197e-05,
|
14603 |
+
"loss": 1.9488,
|
14604 |
+
"step": 31275
|
14605 |
+
},
|
14606 |
+
{
|
14607 |
+
"epoch": 0.06093262553600848,
|
14608 |
+
"grad_norm": 3.987534999847412,
|
14609 |
+
"learning_rate": 2.9726010318805014e-05,
|
14610 |
+
"loss": 1.8628,
|
14611 |
+
"step": 31290
|
14612 |
+
},
|
14613 |
+
{
|
14614 |
+
"epoch": 0.06096183580711874,
|
14615 |
+
"grad_norm": 3.307753086090088,
|
14616 |
+
"learning_rate": 2.9725748365618252e-05,
|
14617 |
+
"loss": 1.9931,
|
14618 |
+
"step": 31305
|
14619 |
+
},
|
14620 |
+
{
|
14621 |
+
"epoch": 0.060991046078229,
|
14622 |
+
"grad_norm": 4.703588485717773,
|
14623 |
+
"learning_rate": 2.9725486288423894e-05,
|
14624 |
+
"loss": 1.8416,
|
14625 |
+
"step": 31320
|
14626 |
+
},
|
14627 |
+
{
|
14628 |
+
"epoch": 0.06102025634933926,
|
14629 |
+
"grad_norm": 2.01497220993042,
|
14630 |
+
"learning_rate": 2.9725224087224146e-05,
|
14631 |
+
"loss": 1.897,
|
14632 |
+
"step": 31335
|
14633 |
+
},
|
14634 |
+
{
|
14635 |
+
"epoch": 0.06104946662044953,
|
14636 |
+
"grad_norm": 2.9679431915283203,
|
14637 |
+
"learning_rate": 2.9724961762021215e-05,
|
14638 |
+
"loss": 1.951,
|
14639 |
+
"step": 31350
|
14640 |
+
},
|
14641 |
+
{
|
14642 |
+
"epoch": 0.06107867689155979,
|
14643 |
+
"grad_norm": 2.5453622341156006,
|
14644 |
+
"learning_rate": 2.972469931281731e-05,
|
14645 |
+
"loss": 1.8823,
|
14646 |
+
"step": 31365
|
14647 |
+
},
|
14648 |
+
{
|
14649 |
+
"epoch": 0.06110788716267005,
|
14650 |
+
"grad_norm": 4.135504245758057,
|
14651 |
+
"learning_rate": 2.9724436739614643e-05,
|
14652 |
+
"loss": 1.9061,
|
14653 |
+
"step": 31380
|
14654 |
+
},
|
14655 |
+
{
|
14656 |
+
"epoch": 0.06113709743378032,
|
14657 |
+
"grad_norm": 1.99344003200531,
|
14658 |
+
"learning_rate": 2.9724174042415417e-05,
|
14659 |
+
"loss": 1.986,
|
14660 |
+
"step": 31395
|
14661 |
+
},
|
14662 |
+
{
|
14663 |
+
"epoch": 0.06116630770489058,
|
14664 |
+
"grad_norm": 4.097934722900391,
|
14665 |
+
"learning_rate": 2.9723911221221857e-05,
|
14666 |
+
"loss": 1.8222,
|
14667 |
+
"step": 31410
|
14668 |
+
},
|
14669 |
+
{
|
14670 |
+
"epoch": 0.06119551797600084,
|
14671 |
+
"grad_norm": 4.836818695068359,
|
14672 |
+
"learning_rate": 2.9723648276036165e-05,
|
14673 |
+
"loss": 1.9038,
|
14674 |
+
"step": 31425
|
14675 |
+
},
|
14676 |
+
{
|
14677 |
+
"epoch": 0.0612247282471111,
|
14678 |
+
"grad_norm": 2.4198572635650635,
|
14679 |
+
"learning_rate": 2.972338520686056e-05,
|
14680 |
+
"loss": 1.9228,
|
14681 |
+
"step": 31440
|
14682 |
+
},
|
14683 |
+
{
|
14684 |
+
"epoch": 0.06125393851822137,
|
14685 |
+
"grad_norm": 4.546353816986084,
|
14686 |
+
"learning_rate": 2.9723122013697265e-05,
|
14687 |
+
"loss": 1.6837,
|
14688 |
+
"step": 31455
|
14689 |
+
},
|
14690 |
+
{
|
14691 |
+
"epoch": 0.06128314878933163,
|
14692 |
+
"grad_norm": 3.9125819206237793,
|
14693 |
+
"learning_rate": 2.972285869654848e-05,
|
14694 |
+
"loss": 1.9057,
|
14695 |
+
"step": 31470
|
14696 |
+
},
|
14697 |
+
{
|
14698 |
+
"epoch": 0.06131235906044189,
|
14699 |
+
"grad_norm": 2.7409276962280273,
|
14700 |
+
"learning_rate": 2.9722595255416435e-05,
|
14701 |
+
"loss": 1.6805,
|
14702 |
+
"step": 31485
|
14703 |
+
},
|
14704 |
+
{
|
14705 |
+
"epoch": 0.06134156933155216,
|
14706 |
+
"grad_norm": 3.4497761726379395,
|
14707 |
+
"learning_rate": 2.9722331690303344e-05,
|
14708 |
+
"loss": 1.7006,
|
14709 |
+
"step": 31500
|
14710 |
+
},
|
14711 |
+
{
|
14712 |
+
"epoch": 0.06137077960266242,
|
14713 |
+
"grad_norm": 3.8101327419281006,
|
14714 |
+
"learning_rate": 2.9722068001211427e-05,
|
14715 |
+
"loss": 1.8445,
|
14716 |
+
"step": 31515
|
14717 |
+
},
|
14718 |
+
{
|
14719 |
+
"epoch": 0.06139998987377268,
|
14720 |
+
"grad_norm": 4.296467304229736,
|
14721 |
+
"learning_rate": 2.9721804188142906e-05,
|
14722 |
+
"loss": 1.9453,
|
14723 |
+
"step": 31530
|
14724 |
+
},
|
14725 |
+
{
|
14726 |
+
"epoch": 0.06142920014488294,
|
14727 |
+
"grad_norm": 4.05743408203125,
|
14728 |
+
"learning_rate": 2.9721540251100003e-05,
|
14729 |
+
"loss": 1.6627,
|
14730 |
+
"step": 31545
|
14731 |
+
},
|
14732 |
+
{
|
14733 |
+
"epoch": 0.06145841041599321,
|
14734 |
+
"grad_norm": 1.896415114402771,
|
14735 |
+
"learning_rate": 2.9721276190084934e-05,
|
14736 |
+
"loss": 2.046,
|
14737 |
+
"step": 31560
|
14738 |
+
},
|
14739 |
+
{
|
14740 |
+
"epoch": 0.06148762068710347,
|
14741 |
+
"grad_norm": 3.3542966842651367,
|
14742 |
+
"learning_rate": 2.9721012005099933e-05,
|
14743 |
+
"loss": 1.7947,
|
14744 |
+
"step": 31575
|
14745 |
+
},
|
14746 |
+
{
|
14747 |
+
"epoch": 0.06151683095821373,
|
14748 |
+
"grad_norm": 2.018230676651001,
|
14749 |
+
"learning_rate": 2.9720747696147214e-05,
|
14750 |
+
"loss": 1.8102,
|
14751 |
+
"step": 31590
|
14752 |
+
},
|
14753 |
+
{
|
14754 |
+
"epoch": 0.06154604122932399,
|
14755 |
+
"grad_norm": 2.8734261989593506,
|
14756 |
+
"learning_rate": 2.9720483263229012e-05,
|
14757 |
+
"loss": 1.9102,
|
14758 |
+
"step": 31605
|
14759 |
+
},
|
14760 |
+
{
|
14761 |
+
"epoch": 0.06157525150043426,
|
14762 |
+
"grad_norm": 2.8697266578674316,
|
14763 |
+
"learning_rate": 2.972021870634755e-05,
|
14764 |
+
"loss": 1.9179,
|
14765 |
+
"step": 31620
|
14766 |
+
},
|
14767 |
+
{
|
14768 |
+
"epoch": 0.06160446177154452,
|
14769 |
+
"grad_norm": 2.5872962474823,
|
14770 |
+
"learning_rate": 2.9719954025505054e-05,
|
14771 |
+
"loss": 1.8555,
|
14772 |
+
"step": 31635
|
14773 |
+
},
|
14774 |
+
{
|
14775 |
+
"epoch": 0.06163367204265478,
|
14776 |
+
"grad_norm": 3.8212106227874756,
|
14777 |
+
"learning_rate": 2.9719689220703758e-05,
|
14778 |
+
"loss": 1.7516,
|
14779 |
+
"step": 31650
|
14780 |
+
},
|
14781 |
+
{
|
14782 |
+
"epoch": 0.06166288231376505,
|
14783 |
+
"grad_norm": 2.7884206771850586,
|
14784 |
+
"learning_rate": 2.9719424291945885e-05,
|
14785 |
+
"loss": 1.8213,
|
14786 |
+
"step": 31665
|
14787 |
+
},
|
14788 |
+
{
|
14789 |
+
"epoch": 0.06169209258487531,
|
14790 |
+
"grad_norm": 2.5221076011657715,
|
14791 |
+
"learning_rate": 2.971915923923367e-05,
|
14792 |
+
"loss": 1.6586,
|
14793 |
+
"step": 31680
|
14794 |
+
},
|
14795 |
+
{
|
14796 |
+
"epoch": 0.06172130285598557,
|
14797 |
+
"grad_norm": 2.4005963802337646,
|
14798 |
+
"learning_rate": 2.971889406256935e-05,
|
14799 |
+
"loss": 2.0582,
|
14800 |
+
"step": 31695
|
14801 |
+
},
|
14802 |
+
{
|
14803 |
+
"epoch": 0.061750513127095834,
|
14804 |
+
"grad_norm": 2.02565860748291,
|
14805 |
+
"learning_rate": 2.9718628761955146e-05,
|
14806 |
+
"loss": 1.8941,
|
14807 |
+
"step": 31710
|
14808 |
+
},
|
14809 |
+
{
|
14810 |
+
"epoch": 0.0617797233982061,
|
14811 |
+
"grad_norm": 3.2867822647094727,
|
14812 |
+
"learning_rate": 2.9718363337393303e-05,
|
14813 |
+
"loss": 1.7149,
|
14814 |
+
"step": 31725
|
14815 |
+
},
|
14816 |
+
{
|
14817 |
+
"epoch": 0.06180893366931636,
|
14818 |
+
"grad_norm": 2.4237873554229736,
|
14819 |
+
"learning_rate": 2.9718097788886054e-05,
|
14820 |
+
"loss": 1.8685,
|
14821 |
+
"step": 31740
|
14822 |
+
},
|
14823 |
+
{
|
14824 |
+
"epoch": 0.061838143940426624,
|
14825 |
+
"grad_norm": 2.000887155532837,
|
14826 |
+
"learning_rate": 2.9717832116435632e-05,
|
14827 |
+
"loss": 1.7489,
|
14828 |
+
"step": 31755
|
14829 |
+
},
|
14830 |
+
{
|
14831 |
+
"epoch": 0.06186735421153689,
|
14832 |
+
"grad_norm": 2.2382023334503174,
|
14833 |
+
"learning_rate": 2.9717566320044276e-05,
|
14834 |
+
"loss": 1.8845,
|
14835 |
+
"step": 31770
|
14836 |
+
},
|
14837 |
+
{
|
14838 |
+
"epoch": 0.06189656448264715,
|
14839 |
+
"grad_norm": 3.9158990383148193,
|
14840 |
+
"learning_rate": 2.9717300399714222e-05,
|
14841 |
+
"loss": 1.689,
|
14842 |
+
"step": 31785
|
14843 |
+
},
|
14844 |
+
{
|
14845 |
+
"epoch": 0.061925774753757414,
|
14846 |
+
"grad_norm": 4.004560470581055,
|
14847 |
+
"learning_rate": 2.9717034355447713e-05,
|
14848 |
+
"loss": 1.9279,
|
14849 |
+
"step": 31800
|
14850 |
+
},
|
14851 |
+
{
|
14852 |
+
"epoch": 0.061954985024867675,
|
14853 |
+
"grad_norm": 2.9310123920440674,
|
14854 |
+
"learning_rate": 2.9716768187246986e-05,
|
14855 |
+
"loss": 1.771,
|
14856 |
+
"step": 31815
|
14857 |
+
},
|
14858 |
+
{
|
14859 |
+
"epoch": 0.06198419529597794,
|
14860 |
+
"grad_norm": 2.5769150257110596,
|
14861 |
+
"learning_rate": 2.9716501895114287e-05,
|
14862 |
+
"loss": 1.9365,
|
14863 |
+
"step": 31830
|
14864 |
+
},
|
14865 |
+
{
|
14866 |
+
"epoch": 0.0620134055670882,
|
14867 |
+
"grad_norm": 3.767500162124634,
|
14868 |
+
"learning_rate": 2.9716235479051858e-05,
|
14869 |
+
"loss": 1.8358,
|
14870 |
+
"step": 31845
|
14871 |
+
},
|
14872 |
+
{
|
14873 |
+
"epoch": 0.062042615838198464,
|
14874 |
+
"grad_norm": 4.410051345825195,
|
14875 |
+
"learning_rate": 2.9715968939061932e-05,
|
14876 |
+
"loss": 1.8382,
|
14877 |
+
"step": 31860
|
14878 |
+
},
|
14879 |
+
{
|
14880 |
+
"epoch": 0.06207182610930873,
|
14881 |
+
"grad_norm": 2.5900702476501465,
|
14882 |
+
"learning_rate": 2.9715702275146767e-05,
|
14883 |
+
"loss": 1.8717,
|
14884 |
+
"step": 31875
|
14885 |
+
},
|
14886 |
+
{
|
14887 |
+
"epoch": 0.06210103638041899,
|
14888 |
+
"grad_norm": 3.7013213634490967,
|
14889 |
+
"learning_rate": 2.9715435487308608e-05,
|
14890 |
+
"loss": 1.8794,
|
14891 |
+
"step": 31890
|
14892 |
+
},
|
14893 |
+
{
|
14894 |
+
"epoch": 0.062130246651529254,
|
14895 |
+
"grad_norm": 2.48606276512146,
|
14896 |
+
"learning_rate": 2.9715168575549688e-05,
|
14897 |
+
"loss": 1.9964,
|
14898 |
+
"step": 31905
|
14899 |
+
},
|
14900 |
+
{
|
14901 |
+
"epoch": 0.062159456922639515,
|
14902 |
+
"grad_norm": 2.358415365219116,
|
14903 |
+
"learning_rate": 2.9714901539872267e-05,
|
14904 |
+
"loss": 1.902,
|
14905 |
+
"step": 31920
|
14906 |
+
},
|
14907 |
+
{
|
14908 |
+
"epoch": 0.06218866719374978,
|
14909 |
+
"grad_norm": 3.140083074569702,
|
14910 |
+
"learning_rate": 2.9714634380278594e-05,
|
14911 |
+
"loss": 1.8696,
|
14912 |
+
"step": 31935
|
14913 |
+
},
|
14914 |
+
{
|
14915 |
+
"epoch": 0.062217877464860044,
|
14916 |
+
"grad_norm": 3.2135777473449707,
|
14917 |
+
"learning_rate": 2.9714367096770913e-05,
|
14918 |
+
"loss": 2.0002,
|
14919 |
+
"step": 31950
|
14920 |
+
},
|
14921 |
+
{
|
14922 |
+
"epoch": 0.062247087735970305,
|
14923 |
+
"grad_norm": 3.2304131984710693,
|
14924 |
+
"learning_rate": 2.9714099689351477e-05,
|
14925 |
+
"loss": 1.8721,
|
14926 |
+
"step": 31965
|
14927 |
+
},
|
14928 |
+
{
|
14929 |
+
"epoch": 0.06227629800708057,
|
14930 |
+
"grad_norm": 2.206209659576416,
|
14931 |
+
"learning_rate": 2.9713832158022535e-05,
|
14932 |
+
"loss": 1.9788,
|
14933 |
+
"step": 31980
|
14934 |
+
},
|
14935 |
+
{
|
14936 |
+
"epoch": 0.062305508278190834,
|
14937 |
+
"grad_norm": 3.055828332901001,
|
14938 |
+
"learning_rate": 2.9713564502786348e-05,
|
14939 |
+
"loss": 1.7976,
|
14940 |
+
"step": 31995
|
14941 |
+
},
|
14942 |
+
{
|
14943 |
+
"epoch": 0.062334718549301095,
|
14944 |
+
"grad_norm": 3.891845226287842,
|
14945 |
+
"learning_rate": 2.9713296723645165e-05,
|
14946 |
+
"loss": 1.7772,
|
14947 |
+
"step": 32010
|
14948 |
+
},
|
14949 |
+
{
|
14950 |
+
"epoch": 0.062363928820411356,
|
14951 |
+
"grad_norm": 1.8532415628433228,
|
14952 |
+
"learning_rate": 2.9713028820601238e-05,
|
14953 |
+
"loss": 1.9817,
|
14954 |
+
"step": 32025
|
14955 |
+
},
|
14956 |
+
{
|
14957 |
+
"epoch": 0.062393139091521624,
|
14958 |
+
"grad_norm": 1.650211215019226,
|
14959 |
+
"learning_rate": 2.9712760793656826e-05,
|
14960 |
+
"loss": 2.1243,
|
14961 |
+
"step": 32040
|
14962 |
+
},
|
14963 |
+
{
|
14964 |
+
"epoch": 0.062422349362631885,
|
14965 |
+
"grad_norm": 2.663621187210083,
|
14966 |
+
"learning_rate": 2.971249264281419e-05,
|
14967 |
+
"loss": 2.0554,
|
14968 |
+
"step": 32055
|
14969 |
+
},
|
14970 |
+
{
|
14971 |
+
"epoch": 0.062451559633742146,
|
14972 |
+
"grad_norm": 3.2437026500701904,
|
14973 |
+
"learning_rate": 2.9712224368075578e-05,
|
14974 |
+
"loss": 1.765,
|
14975 |
+
"step": 32070
|
14976 |
+
},
|
14977 |
+
{
|
14978 |
+
"epoch": 0.06248076990485241,
|
14979 |
+
"grad_norm": 4.013802528381348,
|
14980 |
+
"learning_rate": 2.971195596944326e-05,
|
14981 |
+
"loss": 1.8195,
|
14982 |
+
"step": 32085
|
14983 |
+
},
|
14984 |
+
{
|
14985 |
+
"epoch": 0.06250998017596267,
|
14986 |
+
"grad_norm": 4.0295610427856445,
|
14987 |
+
"learning_rate": 2.971168744691949e-05,
|
14988 |
+
"loss": 2.036,
|
14989 |
+
"step": 32100
|
14990 |
+
},
|
14991 |
+
{
|
14992 |
+
"epoch": 0.06253919044707294,
|
14993 |
+
"grad_norm": 2.830064296722412,
|
14994 |
+
"learning_rate": 2.9711418800506533e-05,
|
14995 |
+
"loss": 1.8499,
|
14996 |
+
"step": 32115
|
14997 |
+
},
|
14998 |
+
{
|
14999 |
+
"epoch": 0.0625684007181832,
|
15000 |
+
"grad_norm": 3.896538019180298,
|
15001 |
+
"learning_rate": 2.9711150030206643e-05,
|
15002 |
+
"loss": 2.0089,
|
15003 |
+
"step": 32130
|
15004 |
+
},
|
15005 |
+
{
|
15006 |
+
"epoch": 0.06259761098929346,
|
15007 |
+
"grad_norm": 3.6802754402160645,
|
15008 |
+
"learning_rate": 2.97108811360221e-05,
|
15009 |
+
"loss": 1.9039,
|
15010 |
+
"step": 32145
|
15011 |
+
},
|
15012 |
+
{
|
15013 |
+
"epoch": 0.06262682126040373,
|
15014 |
+
"grad_norm": 2.8547956943511963,
|
15015 |
+
"learning_rate": 2.9710612117955148e-05,
|
15016 |
+
"loss": 1.9065,
|
15017 |
+
"step": 32160
|
15018 |
+
},
|
15019 |
+
{
|
15020 |
+
"epoch": 0.06265603153151399,
|
15021 |
+
"grad_norm": 2.0819807052612305,
|
15022 |
+
"learning_rate": 2.9710342976008066e-05,
|
15023 |
+
"loss": 1.7786,
|
15024 |
+
"step": 32175
|
15025 |
+
},
|
15026 |
+
{
|
15027 |
+
"epoch": 0.06268524180262425,
|
15028 |
+
"grad_norm": 2.173609972000122,
|
15029 |
+
"learning_rate": 2.9710073710183118e-05,
|
15030 |
+
"loss": 1.8011,
|
15031 |
+
"step": 32190
|
15032 |
+
},
|
15033 |
+
{
|
15034 |
+
"epoch": 0.06271445207373451,
|
15035 |
+
"grad_norm": 2.2233803272247314,
|
15036 |
+
"learning_rate": 2.970980432048257e-05,
|
15037 |
+
"loss": 1.8308,
|
15038 |
+
"step": 32205
|
15039 |
+
},
|
15040 |
+
{
|
15041 |
+
"epoch": 0.06274366234484478,
|
15042 |
+
"grad_norm": 2.227229356765747,
|
15043 |
+
"learning_rate": 2.9709534806908692e-05,
|
15044 |
+
"loss": 1.9227,
|
15045 |
+
"step": 32220
|
15046 |
+
},
|
15047 |
+
{
|
15048 |
+
"epoch": 0.06277287261595504,
|
15049 |
+
"grad_norm": 3.8464114665985107,
|
15050 |
+
"learning_rate": 2.970926516946375e-05,
|
15051 |
+
"loss": 1.9808,
|
15052 |
+
"step": 32235
|
15053 |
+
},
|
15054 |
+
{
|
15055 |
+
"epoch": 0.0628020828870653,
|
15056 |
+
"grad_norm": 4.511869430541992,
|
15057 |
+
"learning_rate": 2.9708995408150018e-05,
|
15058 |
+
"loss": 1.9856,
|
15059 |
+
"step": 32250
|
15060 |
+
},
|
15061 |
+
{
|
15062 |
+
"epoch": 0.06283129315817557,
|
15063 |
+
"grad_norm": 2.0209896564483643,
|
15064 |
+
"learning_rate": 2.9708725522969767e-05,
|
15065 |
+
"loss": 1.8766,
|
15066 |
+
"step": 32265
|
15067 |
+
},
|
15068 |
+
{
|
15069 |
+
"epoch": 0.06286050342928583,
|
15070 |
+
"grad_norm": 4.369567394256592,
|
15071 |
+
"learning_rate": 2.9708455513925273e-05,
|
15072 |
+
"loss": 1.8103,
|
15073 |
+
"step": 32280
|
15074 |
+
},
|
15075 |
+
{
|
15076 |
+
"epoch": 0.06288971370039609,
|
15077 |
+
"grad_norm": 4.502676486968994,
|
15078 |
+
"learning_rate": 2.97081853810188e-05,
|
15079 |
+
"loss": 1.8489,
|
15080 |
+
"step": 32295
|
15081 |
+
},
|
15082 |
+
{
|
15083 |
+
"epoch": 0.06291892397150635,
|
15084 |
+
"grad_norm": 2.288998603820801,
|
15085 |
+
"learning_rate": 2.9707915124252637e-05,
|
15086 |
+
"loss": 1.9542,
|
15087 |
+
"step": 32310
|
15088 |
+
},
|
15089 |
+
{
|
15090 |
+
"epoch": 0.06294813424261662,
|
15091 |
+
"grad_norm": 2.6359355449676514,
|
15092 |
+
"learning_rate": 2.9707644743629046e-05,
|
15093 |
+
"loss": 1.7883,
|
15094 |
+
"step": 32325
|
15095 |
+
},
|
15096 |
+
{
|
15097 |
+
"epoch": 0.06297734451372689,
|
15098 |
+
"grad_norm": 4.356678485870361,
|
15099 |
+
"learning_rate": 2.970737423915031e-05,
|
15100 |
+
"loss": 1.8937,
|
15101 |
+
"step": 32340
|
15102 |
+
},
|
15103 |
+
{
|
15104 |
+
"epoch": 0.06300655478483715,
|
15105 |
+
"grad_norm": 3.2190353870391846,
|
15106 |
+
"learning_rate": 2.970710361081871e-05,
|
15107 |
+
"loss": 1.9758,
|
15108 |
+
"step": 32355
|
15109 |
+
},
|
15110 |
+
{
|
15111 |
+
"epoch": 0.06303576505594741,
|
15112 |
+
"grad_norm": 2.522303819656372,
|
15113 |
+
"learning_rate": 2.970683285863652e-05,
|
15114 |
+
"loss": 1.7315,
|
15115 |
+
"step": 32370
|
15116 |
+
},
|
15117 |
+
{
|
15118 |
+
"epoch": 0.06306497532705767,
|
15119 |
+
"grad_norm": 2.500927448272705,
|
15120 |
+
"learning_rate": 2.9706561982606023e-05,
|
15121 |
+
"loss": 1.9004,
|
15122 |
+
"step": 32385
|
15123 |
+
},
|
15124 |
+
{
|
15125 |
+
"epoch": 0.06309418559816793,
|
15126 |
+
"grad_norm": 3.656970262527466,
|
15127 |
+
"learning_rate": 2.97062909827295e-05,
|
15128 |
+
"loss": 1.9246,
|
15129 |
+
"step": 32400
|
15130 |
+
},
|
15131 |
+
{
|
15132 |
+
"epoch": 0.06312339586927819,
|
15133 |
+
"grad_norm": 4.1837158203125,
|
15134 |
+
"learning_rate": 2.970601985900923e-05,
|
15135 |
+
"loss": 1.6453,
|
15136 |
+
"step": 32415
|
15137 |
+
},
|
15138 |
+
{
|
15139 |
+
"epoch": 0.06315260614038845,
|
15140 |
+
"grad_norm": 5.141902446746826,
|
15141 |
+
"learning_rate": 2.9705748611447498e-05,
|
15142 |
+
"loss": 1.7215,
|
15143 |
+
"step": 32430
|
15144 |
+
},
|
15145 |
+
{
|
15146 |
+
"epoch": 0.06318181641149873,
|
15147 |
+
"grad_norm": 5.384919166564941,
|
15148 |
+
"learning_rate": 2.9705477240046595e-05,
|
15149 |
+
"loss": 2.0638,
|
15150 |
+
"step": 32445
|
15151 |
+
},
|
15152 |
+
{
|
15153 |
+
"epoch": 0.06321102668260899,
|
15154 |
+
"grad_norm": 2.812014579772949,
|
15155 |
+
"learning_rate": 2.9705205744808795e-05,
|
15156 |
+
"loss": 1.8133,
|
15157 |
+
"step": 32460
|
15158 |
+
},
|
15159 |
+
{
|
15160 |
+
"epoch": 0.06324023695371925,
|
15161 |
+
"grad_norm": 4.618744850158691,
|
15162 |
+
"learning_rate": 2.970493412573639e-05,
|
15163 |
+
"loss": 1.7221,
|
15164 |
+
"step": 32475
|
15165 |
+
},
|
15166 |
+
{
|
15167 |
+
"epoch": 0.06326944722482951,
|
15168 |
+
"grad_norm": 2.0948140621185303,
|
15169 |
+
"learning_rate": 2.9704662382831665e-05,
|
15170 |
+
"loss": 1.7292,
|
15171 |
+
"step": 32490
|
15172 |
+
},
|
15173 |
+
{
|
15174 |
+
"epoch": 0.06329865749593977,
|
15175 |
+
"grad_norm": 2.2021284103393555,
|
15176 |
+
"learning_rate": 2.970439051609691e-05,
|
15177 |
+
"loss": 1.6685,
|
15178 |
+
"step": 32505
|
15179 |
+
},
|
15180 |
+
{
|
15181 |
+
"epoch": 0.06332786776705003,
|
15182 |
+
"grad_norm": 4.773664474487305,
|
15183 |
+
"learning_rate": 2.9704118525534414e-05,
|
15184 |
+
"loss": 1.8922,
|
15185 |
+
"step": 32520
|
15186 |
+
},
|
15187 |
+
{
|
15188 |
+
"epoch": 0.06335707803816029,
|
15189 |
+
"grad_norm": 2.716444253921509,
|
15190 |
+
"learning_rate": 2.970384641114647e-05,
|
15191 |
+
"loss": 1.816,
|
15192 |
+
"step": 32535
|
15193 |
+
},
|
15194 |
+
{
|
15195 |
+
"epoch": 0.06338628830927057,
|
15196 |
+
"grad_norm": 4.086103916168213,
|
15197 |
+
"learning_rate": 2.9703574172935366e-05,
|
15198 |
+
"loss": 1.8084,
|
15199 |
+
"step": 32550
|
15200 |
+
},
|
15201 |
+
{
|
15202 |
+
"epoch": 0.06341549858038083,
|
15203 |
+
"grad_norm": 2.4854936599731445,
|
15204 |
+
"learning_rate": 2.9703301810903397e-05,
|
15205 |
+
"loss": 1.7871,
|
15206 |
+
"step": 32565
|
15207 |
+
},
|
15208 |
+
{
|
15209 |
+
"epoch": 0.06344470885149109,
|
15210 |
+
"grad_norm": 1.8130111694335938,
|
15211 |
+
"learning_rate": 2.9703029325052857e-05,
|
15212 |
+
"loss": 1.8795,
|
15213 |
+
"step": 32580
|
15214 |
+
},
|
15215 |
+
{
|
15216 |
+
"epoch": 0.06347391912260135,
|
15217 |
+
"grad_norm": 3.8686211109161377,
|
15218 |
+
"learning_rate": 2.970275671538604e-05,
|
15219 |
+
"loss": 1.9867,
|
15220 |
+
"step": 32595
|
15221 |
+
},
|
15222 |
+
{
|
15223 |
+
"epoch": 0.06350312939371161,
|
15224 |
+
"grad_norm": 2.5253164768218994,
|
15225 |
+
"learning_rate": 2.9702483981905233e-05,
|
15226 |
+
"loss": 1.875,
|
15227 |
+
"step": 32610
|
15228 |
+
},
|
15229 |
+
{
|
15230 |
+
"epoch": 0.06353233966482187,
|
15231 |
+
"grad_norm": 3.0097036361694336,
|
15232 |
+
"learning_rate": 2.9702211124612745e-05,
|
15233 |
+
"loss": 1.9521,
|
15234 |
+
"step": 32625
|
15235 |
+
},
|
15236 |
+
{
|
15237 |
+
"epoch": 0.06356154993593213,
|
15238 |
+
"grad_norm": 3.3433878421783447,
|
15239 |
+
"learning_rate": 2.9701938143510873e-05,
|
15240 |
+
"loss": 1.806,
|
15241 |
+
"step": 32640
|
15242 |
+
},
|
15243 |
+
{
|
15244 |
+
"epoch": 0.06359076020704241,
|
15245 |
+
"grad_norm": 1.4688777923583984,
|
15246 |
+
"learning_rate": 2.970166503860191e-05,
|
15247 |
+
"loss": 1.8248,
|
15248 |
+
"step": 32655
|
15249 |
+
},
|
15250 |
+
{
|
15251 |
+
"epoch": 0.06361997047815267,
|
15252 |
+
"grad_norm": 2.365694522857666,
|
15253 |
+
"learning_rate": 2.9701391809888156e-05,
|
15254 |
+
"loss": 1.7152,
|
15255 |
+
"step": 32670
|
15256 |
+
},
|
15257 |
+
{
|
15258 |
+
"epoch": 0.06364918074926293,
|
15259 |
+
"grad_norm": 2.6821203231811523,
|
15260 |
+
"learning_rate": 2.9701118457371915e-05,
|
15261 |
+
"loss": 1.858,
|
15262 |
+
"step": 32685
|
15263 |
+
},
|
15264 |
+
{
|
15265 |
+
"epoch": 0.06367839102037319,
|
15266 |
+
"grad_norm": 1.842124342918396,
|
15267 |
+
"learning_rate": 2.9700844981055486e-05,
|
15268 |
+
"loss": 1.7689,
|
15269 |
+
"step": 32700
|
15270 |
+
},
|
15271 |
+
{
|
15272 |
+
"epoch": 0.06370760129148345,
|
15273 |
+
"grad_norm": 2.2430474758148193,
|
15274 |
+
"learning_rate": 2.9700571380941178e-05,
|
15275 |
+
"loss": 2.0056,
|
15276 |
+
"step": 32715
|
15277 |
+
},
|
15278 |
+
{
|
15279 |
+
"epoch": 0.06373681156259371,
|
15280 |
+
"grad_norm": 3.1308460235595703,
|
15281 |
+
"learning_rate": 2.970029765703129e-05,
|
15282 |
+
"loss": 1.9163,
|
15283 |
+
"step": 32730
|
15284 |
+
},
|
15285 |
+
{
|
15286 |
+
"epoch": 0.06376602183370397,
|
15287 |
+
"grad_norm": 2.6456706523895264,
|
15288 |
+
"learning_rate": 2.9700023809328122e-05,
|
15289 |
+
"loss": 1.8002,
|
15290 |
+
"step": 32745
|
15291 |
+
},
|
15292 |
+
{
|
15293 |
+
"epoch": 0.06379523210481425,
|
15294 |
+
"grad_norm": 3.659024953842163,
|
15295 |
+
"learning_rate": 2.969974983783399e-05,
|
15296 |
+
"loss": 1.9222,
|
15297 |
+
"step": 32760
|
15298 |
+
},
|
15299 |
+
{
|
15300 |
+
"epoch": 0.06382444237592451,
|
15301 |
+
"grad_norm": 2.3271381855010986,
|
15302 |
+
"learning_rate": 2.96994757425512e-05,
|
15303 |
+
"loss": 1.7778,
|
15304 |
+
"step": 32775
|
15305 |
+
},
|
15306 |
+
{
|
15307 |
+
"epoch": 0.06385365264703477,
|
15308 |
+
"grad_norm": 3.411533832550049,
|
15309 |
+
"learning_rate": 2.9699201523482057e-05,
|
15310 |
+
"loss": 1.778,
|
15311 |
+
"step": 32790
|
15312 |
+
},
|
15313 |
+
{
|
15314 |
+
"epoch": 0.06388286291814503,
|
15315 |
+
"grad_norm": 2.649052143096924,
|
15316 |
+
"learning_rate": 2.9698927180628866e-05,
|
15317 |
+
"loss": 1.8199,
|
15318 |
+
"step": 32805
|
15319 |
+
},
|
15320 |
+
{
|
15321 |
+
"epoch": 0.06391207318925529,
|
15322 |
+
"grad_norm": 2.8163363933563232,
|
15323 |
+
"learning_rate": 2.9698652713993947e-05,
|
15324 |
+
"loss": 1.8579,
|
15325 |
+
"step": 32820
|
15326 |
+
},
|
15327 |
+
{
|
15328 |
+
"epoch": 0.06394128346036555,
|
15329 |
+
"grad_norm": 3.319955348968506,
|
15330 |
+
"learning_rate": 2.9698378123579603e-05,
|
15331 |
+
"loss": 1.9385,
|
15332 |
+
"step": 32835
|
15333 |
+
},
|
15334 |
+
{
|
15335 |
+
"epoch": 0.06397049373147581,
|
15336 |
+
"grad_norm": 2.217761754989624,
|
15337 |
+
"learning_rate": 2.969810340938815e-05,
|
15338 |
+
"loss": 2.0007,
|
15339 |
+
"step": 32850
|
15340 |
+
},
|
15341 |
+
{
|
15342 |
+
"epoch": 0.06399970400258609,
|
15343 |
+
"grad_norm": 4.801873683929443,
|
15344 |
+
"learning_rate": 2.96978285714219e-05,
|
15345 |
+
"loss": 1.9876,
|
15346 |
+
"step": 32865
|
15347 |
+
},
|
15348 |
+
{
|
15349 |
+
"epoch": 0.06402891427369635,
|
15350 |
+
"grad_norm": 3.5146069526672363,
|
15351 |
+
"learning_rate": 2.969755360968317e-05,
|
15352 |
+
"loss": 1.8267,
|
15353 |
+
"step": 32880
|
15354 |
+
},
|
15355 |
+
{
|
15356 |
+
"epoch": 0.06405812454480661,
|
15357 |
+
"grad_norm": 3.407545328140259,
|
15358 |
+
"learning_rate": 2.9697278524174275e-05,
|
15359 |
+
"loss": 1.743,
|
15360 |
+
"step": 32895
|
15361 |
+
},
|
15362 |
+
{
|
15363 |
+
"epoch": 0.06408733481591687,
|
15364 |
+
"grad_norm": 1.410946011543274,
|
15365 |
+
"learning_rate": 2.969700331489753e-05,
|
15366 |
+
"loss": 1.9281,
|
15367 |
+
"step": 32910
|
15368 |
+
},
|
15369 |
+
{
|
15370 |
+
"epoch": 0.06411654508702713,
|
15371 |
+
"grad_norm": 2.602207899093628,
|
15372 |
+
"learning_rate": 2.9696727981855253e-05,
|
15373 |
+
"loss": 1.8532,
|
15374 |
+
"step": 32925
|
15375 |
+
},
|
15376 |
+
{
|
15377 |
+
"epoch": 0.0641457553581374,
|
15378 |
+
"grad_norm": 4.358025550842285,
|
15379 |
+
"learning_rate": 2.969645252504976e-05,
|
15380 |
+
"loss": 1.7457,
|
15381 |
+
"step": 32940
|
15382 |
+
},
|
15383 |
+
{
|
15384 |
+
"epoch": 0.06417496562924765,
|
15385 |
+
"grad_norm": 3.8209457397460938,
|
15386 |
+
"learning_rate": 2.9696176944483373e-05,
|
15387 |
+
"loss": 2.0736,
|
15388 |
+
"step": 32955
|
15389 |
+
},
|
15390 |
+
{
|
15391 |
+
"epoch": 0.06420417590035793,
|
15392 |
+
"grad_norm": 3.317567825317383,
|
15393 |
+
"learning_rate": 2.9695901240158415e-05,
|
15394 |
+
"loss": 1.9322,
|
15395 |
+
"step": 32970
|
15396 |
+
},
|
15397 |
+
{
|
15398 |
+
"epoch": 0.06423338617146819,
|
15399 |
+
"grad_norm": 4.668582439422607,
|
15400 |
+
"learning_rate": 2.9695625412077208e-05,
|
15401 |
+
"loss": 2.0152,
|
15402 |
+
"step": 32985
|
15403 |
+
},
|
15404 |
+
{
|
15405 |
+
"epoch": 0.06426259644257845,
|
15406 |
+
"grad_norm": 2.6789300441741943,
|
15407 |
+
"learning_rate": 2.969534946024207e-05,
|
15408 |
+
"loss": 1.7129,
|
15409 |
+
"step": 33000
|
15410 |
+
},
|
15411 |
+
{
|
15412 |
+
"epoch": 0.06429180671368871,
|
15413 |
+
"grad_norm": 2.073744297027588,
|
15414 |
+
"learning_rate": 2.9695073384655326e-05,
|
15415 |
+
"loss": 1.6952,
|
15416 |
+
"step": 33015
|
15417 |
+
},
|
15418 |
+
{
|
15419 |
+
"epoch": 0.06432101698479897,
|
15420 |
+
"grad_norm": 3.3017473220825195,
|
15421 |
+
"learning_rate": 2.9694797185319304e-05,
|
15422 |
+
"loss": 1.8158,
|
15423 |
+
"step": 33030
|
15424 |
+
},
|
15425 |
+
{
|
15426 |
+
"epoch": 0.06435022725590923,
|
15427 |
+
"grad_norm": 4.441155433654785,
|
15428 |
+
"learning_rate": 2.969452086223633e-05,
|
15429 |
+
"loss": 1.9204,
|
15430 |
+
"step": 33045
|
15431 |
+
},
|
15432 |
+
{
|
15433 |
+
"epoch": 0.0643794375270195,
|
15434 |
+
"grad_norm": 2.9764716625213623,
|
15435 |
+
"learning_rate": 2.9694244415408727e-05,
|
15436 |
+
"loss": 1.8952,
|
15437 |
+
"step": 33060
|
15438 |
+
},
|
15439 |
+
{
|
15440 |
+
"epoch": 0.06440864779812977,
|
15441 |
+
"grad_norm": 4.1646575927734375,
|
15442 |
+
"learning_rate": 2.9693967844838827e-05,
|
15443 |
+
"loss": 1.7692,
|
15444 |
+
"step": 33075
|
15445 |
+
},
|
15446 |
+
{
|
15447 |
+
"epoch": 0.06443785806924003,
|
15448 |
+
"grad_norm": 4.056210994720459,
|
15449 |
+
"learning_rate": 2.9693691150528955e-05,
|
15450 |
+
"loss": 1.855,
|
15451 |
+
"step": 33090
|
15452 |
+
},
|
15453 |
+
{
|
15454 |
+
"epoch": 0.06446706834035029,
|
15455 |
+
"grad_norm": 2.6098761558532715,
|
15456 |
+
"learning_rate": 2.9693414332481443e-05,
|
15457 |
+
"loss": 1.8284,
|
15458 |
+
"step": 33105
|
15459 |
+
},
|
15460 |
+
{
|
15461 |
+
"epoch": 0.06449627861146055,
|
15462 |
+
"grad_norm": 4.3388166427612305,
|
15463 |
+
"learning_rate": 2.9693137390698622e-05,
|
15464 |
+
"loss": 1.8155,
|
15465 |
+
"step": 33120
|
15466 |
+
},
|
15467 |
+
{
|
15468 |
+
"epoch": 0.06452548888257081,
|
15469 |
+
"grad_norm": 3.3735921382904053,
|
15470 |
+
"learning_rate": 2.9692860325182825e-05,
|
15471 |
+
"loss": 1.7765,
|
15472 |
+
"step": 33135
|
15473 |
+
},
|
15474 |
+
{
|
15475 |
+
"epoch": 0.06455469915368107,
|
15476 |
+
"grad_norm": 3.9341137409210205,
|
15477 |
+
"learning_rate": 2.9692583135936385e-05,
|
15478 |
+
"loss": 1.7622,
|
15479 |
+
"step": 33150
|
15480 |
+
},
|
15481 |
+
{
|
15482 |
+
"epoch": 0.06458390942479134,
|
15483 |
+
"grad_norm": 5.620511531829834,
|
15484 |
+
"learning_rate": 2.9692305822961637e-05,
|
15485 |
+
"loss": 1.7926,
|
15486 |
+
"step": 33165
|
15487 |
+
},
|
15488 |
+
{
|
15489 |
+
"epoch": 0.06461311969590161,
|
15490 |
+
"grad_norm": 1.8949609994888306,
|
15491 |
+
"learning_rate": 2.9692028386260915e-05,
|
15492 |
+
"loss": 1.9158,
|
15493 |
+
"step": 33180
|
15494 |
+
},
|
15495 |
+
{
|
15496 |
+
"epoch": 0.06464232996701187,
|
15497 |
+
"grad_norm": 2.183438539505005,
|
15498 |
+
"learning_rate": 2.9691750825836557e-05,
|
15499 |
+
"loss": 1.9586,
|
15500 |
+
"step": 33195
|
15501 |
+
},
|
15502 |
+
{
|
15503 |
+
"epoch": 0.06467154023812213,
|
15504 |
+
"grad_norm": 2.05364727973938,
|
15505 |
+
"learning_rate": 2.9691473141690894e-05,
|
15506 |
+
"loss": 1.8952,
|
15507 |
+
"step": 33210
|
15508 |
+
},
|
15509 |
+
{
|
15510 |
+
"epoch": 0.0647007505092324,
|
15511 |
+
"grad_norm": 2.479680299758911,
|
15512 |
+
"learning_rate": 2.9691195333826277e-05,
|
15513 |
+
"loss": 1.7908,
|
15514 |
+
"step": 33225
|
15515 |
+
},
|
15516 |
+
{
|
15517 |
+
"epoch": 0.06472996078034265,
|
15518 |
+
"grad_norm": 2.0384159088134766,
|
15519 |
+
"learning_rate": 2.9690917402245034e-05,
|
15520 |
+
"loss": 1.8231,
|
15521 |
+
"step": 33240
|
15522 |
+
},
|
15523 |
+
{
|
15524 |
+
"epoch": 0.06475917105145292,
|
15525 |
+
"grad_norm": 2.3003928661346436,
|
15526 |
+
"learning_rate": 2.9690639346949503e-05,
|
15527 |
+
"loss": 1.6675,
|
15528 |
+
"step": 33255
|
15529 |
+
},
|
15530 |
+
{
|
15531 |
+
"epoch": 0.06478838132256318,
|
15532 |
+
"grad_norm": 2.9308221340179443,
|
15533 |
+
"learning_rate": 2.9690361167942042e-05,
|
15534 |
+
"loss": 1.9136,
|
15535 |
+
"step": 33270
|
15536 |
+
},
|
15537 |
+
{
|
15538 |
+
"epoch": 0.06481759159367345,
|
15539 |
+
"grad_norm": 2.3866639137268066,
|
15540 |
+
"learning_rate": 2.9690082865224975e-05,
|
15541 |
+
"loss": 1.8992,
|
15542 |
+
"step": 33285
|
15543 |
+
},
|
15544 |
+
{
|
15545 |
+
"epoch": 0.06484680186478371,
|
15546 |
+
"grad_norm": 4.399453163146973,
|
15547 |
+
"learning_rate": 2.968980443880066e-05,
|
15548 |
+
"loss": 1.9369,
|
15549 |
+
"step": 33300
|
15550 |
+
},
|
15551 |
+
{
|
15552 |
+
"epoch": 0.06487601213589397,
|
15553 |
+
"grad_norm": 3.06842041015625,
|
15554 |
+
"learning_rate": 2.9689525888671436e-05,
|
15555 |
+
"loss": 1.9624,
|
15556 |
+
"step": 33315
|
15557 |
+
},
|
15558 |
+
{
|
15559 |
+
"epoch": 0.06490522240700423,
|
15560 |
+
"grad_norm": 1.9636744260787964,
|
15561 |
+
"learning_rate": 2.9689247214839647e-05,
|
15562 |
+
"loss": 1.7785,
|
15563 |
+
"step": 33330
|
15564 |
+
},
|
15565 |
+
{
|
15566 |
+
"epoch": 0.0649344326781145,
|
15567 |
+
"grad_norm": 3.882155418395996,
|
15568 |
+
"learning_rate": 2.968896841730764e-05,
|
15569 |
+
"loss": 1.8328,
|
15570 |
+
"step": 33345
|
15571 |
+
},
|
15572 |
+
{
|
15573 |
+
"epoch": 0.06496364294922476,
|
15574 |
+
"grad_norm": 3.773200511932373,
|
15575 |
+
"learning_rate": 2.9688689496077764e-05,
|
15576 |
+
"loss": 1.9322,
|
15577 |
+
"step": 33360
|
15578 |
+
},
|
15579 |
+
{
|
15580 |
+
"epoch": 0.06499285322033502,
|
15581 |
+
"grad_norm": 2.331101417541504,
|
15582 |
+
"learning_rate": 2.968841045115237e-05,
|
15583 |
+
"loss": 1.7726,
|
15584 |
+
"step": 33375
|
15585 |
+
},
|
15586 |
+
{
|
15587 |
+
"epoch": 0.06502206349144529,
|
15588 |
+
"grad_norm": 3.3869986534118652,
|
15589 |
+
"learning_rate": 2.9688131282533802e-05,
|
15590 |
+
"loss": 1.9489,
|
15591 |
+
"step": 33390
|
15592 |
+
},
|
15593 |
+
{
|
15594 |
+
"epoch": 0.06505127376255555,
|
15595 |
+
"grad_norm": 2.627638578414917,
|
15596 |
+
"learning_rate": 2.968785199022442e-05,
|
15597 |
+
"loss": 1.7626,
|
15598 |
+
"step": 33405
|
15599 |
+
},
|
15600 |
+
{
|
15601 |
+
"epoch": 0.06508048403366581,
|
15602 |
+
"grad_norm": 2.031043291091919,
|
15603 |
+
"learning_rate": 2.9687572574226566e-05,
|
15604 |
+
"loss": 1.8948,
|
15605 |
+
"step": 33420
|
15606 |
+
},
|
15607 |
+
{
|
15608 |
+
"epoch": 0.06510969430477608,
|
15609 |
+
"grad_norm": 1.6995463371276855,
|
15610 |
+
"learning_rate": 2.96872930345426e-05,
|
15611 |
+
"loss": 1.8642,
|
15612 |
+
"step": 33435
|
15613 |
+
},
|
15614 |
+
{
|
15615 |
+
"epoch": 0.06513890457588634,
|
15616 |
+
"grad_norm": 3.292022943496704,
|
15617 |
+
"learning_rate": 2.9687013371174873e-05,
|
15618 |
+
"loss": 1.8344,
|
15619 |
+
"step": 33450
|
15620 |
+
},
|
15621 |
+
{
|
15622 |
+
"epoch": 0.0651681148469966,
|
15623 |
+
"grad_norm": 2.9283857345581055,
|
15624 |
+
"learning_rate": 2.968673358412574e-05,
|
15625 |
+
"loss": 2.0218,
|
15626 |
+
"step": 33465
|
15627 |
+
},
|
15628 |
+
{
|
15629 |
+
"epoch": 0.06519732511810686,
|
15630 |
+
"grad_norm": 3.2172300815582275,
|
15631 |
+
"learning_rate": 2.968645367339756e-05,
|
15632 |
+
"loss": 1.8049,
|
15633 |
+
"step": 33480
|
15634 |
+
},
|
15635 |
+
{
|
15636 |
+
"epoch": 0.06522653538921712,
|
15637 |
+
"grad_norm": 2.1529486179351807,
|
15638 |
+
"learning_rate": 2.9686173638992687e-05,
|
15639 |
+
"loss": 1.8755,
|
15640 |
+
"step": 33495
|
15641 |
+
},
|
15642 |
+
{
|
15643 |
+
"epoch": 0.0652557456603274,
|
15644 |
+
"grad_norm": 2.5349905490875244,
|
15645 |
+
"learning_rate": 2.9685893480913477e-05,
|
15646 |
+
"loss": 2.0173,
|
15647 |
+
"step": 33510
|
15648 |
+
},
|
15649 |
+
{
|
15650 |
+
"epoch": 0.06528495593143765,
|
15651 |
+
"grad_norm": 3.646724224090576,
|
15652 |
+
"learning_rate": 2.9685613199162296e-05,
|
15653 |
+
"loss": 1.9269,
|
15654 |
+
"step": 33525
|
15655 |
+
},
|
15656 |
+
{
|
15657 |
+
"epoch": 0.06531416620254792,
|
15658 |
+
"grad_norm": 3.3442795276641846,
|
15659 |
+
"learning_rate": 2.96853327937415e-05,
|
15660 |
+
"loss": 1.7363,
|
15661 |
+
"step": 33540
|
15662 |
+
},
|
15663 |
+
{
|
15664 |
+
"epoch": 0.06534337647365818,
|
15665 |
+
"grad_norm": 3.301544189453125,
|
15666 |
+
"learning_rate": 2.9685052264653452e-05,
|
15667 |
+
"loss": 1.9225,
|
15668 |
+
"step": 33555
|
15669 |
+
},
|
15670 |
+
{
|
15671 |
+
"epoch": 0.06537258674476844,
|
15672 |
+
"grad_norm": 1.919858455657959,
|
15673 |
+
"learning_rate": 2.9684771611900512e-05,
|
15674 |
+
"loss": 1.7969,
|
15675 |
+
"step": 33570
|
15676 |
+
},
|
15677 |
+
{
|
15678 |
+
"epoch": 0.0654017970158787,
|
15679 |
+
"grad_norm": 3.6086807250976562,
|
15680 |
+
"learning_rate": 2.9684490835485045e-05,
|
15681 |
+
"loss": 1.8075,
|
15682 |
+
"step": 33585
|
15683 |
+
},
|
15684 |
+
{
|
15685 |
+
"epoch": 0.06543100728698896,
|
15686 |
+
"grad_norm": 1.659013032913208,
|
15687 |
+
"learning_rate": 2.9684209935409418e-05,
|
15688 |
+
"loss": 1.7631,
|
15689 |
+
"step": 33600
|
15690 |
+
},
|
15691 |
+
{
|
15692 |
+
"epoch": 0.06546021755809923,
|
15693 |
+
"grad_norm": 4.616360187530518,
|
15694 |
+
"learning_rate": 2.968392891167599e-05,
|
15695 |
+
"loss": 1.817,
|
15696 |
+
"step": 33615
|
15697 |
+
},
|
15698 |
+
{
|
15699 |
+
"epoch": 0.0654894278292095,
|
15700 |
+
"grad_norm": 2.684124708175659,
|
15701 |
+
"learning_rate": 2.9683647764287136e-05,
|
15702 |
+
"loss": 1.8655,
|
15703 |
+
"step": 33630
|
15704 |
+
},
|
15705 |
+
{
|
15706 |
+
"epoch": 0.06551863810031976,
|
15707 |
+
"grad_norm": 2.5763068199157715,
|
15708 |
+
"learning_rate": 2.9683366493245213e-05,
|
15709 |
+
"loss": 1.851,
|
15710 |
+
"step": 33645
|
15711 |
+
},
|
15712 |
+
{
|
15713 |
+
"epoch": 0.06554784837143002,
|
15714 |
+
"grad_norm": 4.1813859939575195,
|
15715 |
+
"learning_rate": 2.96830850985526e-05,
|
15716 |
+
"loss": 1.8571,
|
15717 |
+
"step": 33660
|
15718 |
+
},
|
15719 |
+
{
|
15720 |
+
"epoch": 0.06557705864254028,
|
15721 |
+
"grad_norm": 3.5396945476531982,
|
15722 |
+
"learning_rate": 2.968280358021166e-05,
|
15723 |
+
"loss": 1.8414,
|
15724 |
+
"step": 33675
|
15725 |
+
},
|
15726 |
+
{
|
15727 |
+
"epoch": 0.06560626891365054,
|
15728 |
+
"grad_norm": 3.497781276702881,
|
15729 |
+
"learning_rate": 2.968252193822477e-05,
|
15730 |
+
"loss": 2.0295,
|
15731 |
+
"step": 33690
|
15732 |
+
},
|
15733 |
+
{
|
15734 |
+
"epoch": 0.0656354791847608,
|
15735 |
+
"grad_norm": 2.3261985778808594,
|
15736 |
+
"learning_rate": 2.9682240172594294e-05,
|
15737 |
+
"loss": 1.8092,
|
15738 |
+
"step": 33705
|
15739 |
+
},
|
15740 |
+
{
|
15741 |
+
"epoch": 0.06566468945587108,
|
15742 |
+
"grad_norm": 2.558701753616333,
|
15743 |
+
"learning_rate": 2.968195828332261e-05,
|
15744 |
+
"loss": 1.9174,
|
15745 |
+
"step": 33720
|
15746 |
+
},
|
15747 |
+
{
|
15748 |
+
"epoch": 0.06569389972698134,
|
15749 |
+
"grad_norm": 3.210392713546753,
|
15750 |
+
"learning_rate": 2.9681676270412092e-05,
|
15751 |
+
"loss": 1.8506,
|
15752 |
+
"step": 33735
|
15753 |
+
},
|
15754 |
+
{
|
15755 |
+
"epoch": 0.0657231099980916,
|
15756 |
+
"grad_norm": 2.7672812938690186,
|
15757 |
+
"learning_rate": 2.968139413386511e-05,
|
15758 |
+
"loss": 1.9361,
|
15759 |
+
"step": 33750
|
15760 |
+
},
|
15761 |
+
{
|
15762 |
+
"epoch": 0.06575232026920186,
|
15763 |
+
"grad_norm": 4.212212562561035,
|
15764 |
+
"learning_rate": 2.9681111873684046e-05,
|
15765 |
+
"loss": 1.7308,
|
15766 |
+
"step": 33765
|
15767 |
+
},
|
15768 |
+
{
|
15769 |
+
"epoch": 0.06578153054031212,
|
15770 |
+
"grad_norm": 1.9889777898788452,
|
15771 |
+
"learning_rate": 2.9680829489871274e-05,
|
15772 |
+
"loss": 1.8549,
|
15773 |
+
"step": 33780
|
15774 |
+
},
|
15775 |
+
{
|
15776 |
+
"epoch": 0.06581074081142238,
|
15777 |
+
"grad_norm": 1.7036499977111816,
|
15778 |
+
"learning_rate": 2.9680546982429166e-05,
|
15779 |
+
"loss": 1.877,
|
15780 |
+
"step": 33795
|
15781 |
+
},
|
15782 |
+
{
|
15783 |
+
"epoch": 0.06583995108253264,
|
15784 |
+
"grad_norm": 2.7053897380828857,
|
15785 |
+
"learning_rate": 2.9680264351360115e-05,
|
15786 |
+
"loss": 1.8077,
|
15787 |
+
"step": 33810
|
15788 |
+
},
|
15789 |
+
{
|
15790 |
+
"epoch": 0.06586916135364292,
|
15791 |
+
"grad_norm": 2.8420169353485107,
|
15792 |
+
"learning_rate": 2.967998159666649e-05,
|
15793 |
+
"loss": 1.8568,
|
15794 |
+
"step": 33825
|
15795 |
+
},
|
15796 |
+
{
|
15797 |
+
"epoch": 0.06589837162475318,
|
15798 |
+
"grad_norm": 2.4401204586029053,
|
15799 |
+
"learning_rate": 2.9679698718350673e-05,
|
15800 |
+
"loss": 1.8119,
|
15801 |
+
"step": 33840
|
15802 |
+
},
|
15803 |
+
{
|
15804 |
+
"epoch": 0.06592758189586344,
|
15805 |
+
"grad_norm": 3.5529043674468994,
|
15806 |
+
"learning_rate": 2.9679415716415053e-05,
|
15807 |
+
"loss": 1.755,
|
15808 |
+
"step": 33855
|
15809 |
+
},
|
15810 |
+
{
|
15811 |
+
"epoch": 0.0659567921669737,
|
15812 |
+
"grad_norm": 2.672156572341919,
|
15813 |
+
"learning_rate": 2.9679132590862004e-05,
|
15814 |
+
"loss": 1.9222,
|
15815 |
+
"step": 33870
|
15816 |
+
},
|
15817 |
+
{
|
15818 |
+
"epoch": 0.06598600243808396,
|
15819 |
+
"grad_norm": 1.9536501169204712,
|
15820 |
+
"learning_rate": 2.967884934169392e-05,
|
15821 |
+
"loss": 1.9098,
|
15822 |
+
"step": 33885
|
15823 |
+
},
|
15824 |
+
{
|
15825 |
+
"epoch": 0.06601521270919422,
|
15826 |
+
"grad_norm": 2.567133903503418,
|
15827 |
+
"learning_rate": 2.9678565968913177e-05,
|
15828 |
+
"loss": 1.924,
|
15829 |
+
"step": 33900
|
15830 |
+
},
|
15831 |
+
{
|
15832 |
+
"epoch": 0.06604442298030448,
|
15833 |
+
"grad_norm": 2.739780902862549,
|
15834 |
+
"learning_rate": 2.967828247252217e-05,
|
15835 |
+
"loss": 1.9591,
|
15836 |
+
"step": 33915
|
15837 |
+
},
|
15838 |
+
{
|
15839 |
+
"epoch": 0.06607363325141476,
|
15840 |
+
"grad_norm": 3.8824591636657715,
|
15841 |
+
"learning_rate": 2.9677998852523277e-05,
|
15842 |
+
"loss": 1.8305,
|
15843 |
+
"step": 33930
|
15844 |
+
},
|
15845 |
+
{
|
15846 |
+
"epoch": 0.06610284352252502,
|
15847 |
+
"grad_norm": 3.137244939804077,
|
15848 |
+
"learning_rate": 2.967771510891889e-05,
|
15849 |
+
"loss": 1.9673,
|
15850 |
+
"step": 33945
|
15851 |
+
},
|
15852 |
+
{
|
15853 |
+
"epoch": 0.06613205379363528,
|
15854 |
+
"grad_norm": 2.5533084869384766,
|
15855 |
+
"learning_rate": 2.9677431241711405e-05,
|
15856 |
+
"loss": 1.7838,
|
15857 |
+
"step": 33960
|
15858 |
+
},
|
15859 |
+
{
|
15860 |
+
"epoch": 0.06616126406474554,
|
15861 |
+
"grad_norm": 3.2840499877929688,
|
15862 |
+
"learning_rate": 2.9677147250903203e-05,
|
15863 |
+
"loss": 1.7223,
|
15864 |
+
"step": 33975
|
15865 |
+
},
|
15866 |
+
{
|
15867 |
+
"epoch": 0.0661904743358558,
|
15868 |
+
"grad_norm": 3.5756568908691406,
|
15869 |
+
"learning_rate": 2.9676863136496685e-05,
|
15870 |
+
"loss": 2.0505,
|
15871 |
+
"step": 33990
|
15872 |
+
},
|
15873 |
+
{
|
15874 |
+
"epoch": 0.06621968460696606,
|
15875 |
+
"grad_norm": 2.9339518547058105,
|
15876 |
+
"learning_rate": 2.967657889849423e-05,
|
15877 |
+
"loss": 1.8248,
|
15878 |
+
"step": 34005
|
15879 |
+
},
|
15880 |
+
{
|
15881 |
+
"epoch": 0.06624889487807632,
|
15882 |
+
"grad_norm": 2.587409496307373,
|
15883 |
+
"learning_rate": 2.9676294536898247e-05,
|
15884 |
+
"loss": 2.1007,
|
15885 |
+
"step": 34020
|
15886 |
+
},
|
15887 |
+
{
|
15888 |
+
"epoch": 0.0662781051491866,
|
15889 |
+
"grad_norm": 2.164442777633667,
|
15890 |
+
"learning_rate": 2.9676010051711123e-05,
|
15891 |
+
"loss": 1.6494,
|
15892 |
+
"step": 34035
|
15893 |
+
},
|
15894 |
+
{
|
15895 |
+
"epoch": 0.06630731542029686,
|
15896 |
+
"grad_norm": 2.022341012954712,
|
15897 |
+
"learning_rate": 2.9675725442935252e-05,
|
15898 |
+
"loss": 2.01,
|
15899 |
+
"step": 34050
|
15900 |
+
},
|
15901 |
+
{
|
15902 |
+
"epoch": 0.06633652569140712,
|
15903 |
+
"grad_norm": 2.1963977813720703,
|
15904 |
+
"learning_rate": 2.9675440710573036e-05,
|
15905 |
+
"loss": 1.7275,
|
15906 |
+
"step": 34065
|
15907 |
+
},
|
15908 |
+
{
|
15909 |
+
"epoch": 0.06636573596251738,
|
15910 |
+
"grad_norm": 2.7345070838928223,
|
15911 |
+
"learning_rate": 2.967515585462687e-05,
|
15912 |
+
"loss": 1.6542,
|
15913 |
+
"step": 34080
|
15914 |
+
},
|
15915 |
+
{
|
15916 |
+
"epoch": 0.06639494623362764,
|
15917 |
+
"grad_norm": 2.9531948566436768,
|
15918 |
+
"learning_rate": 2.9674870875099144e-05,
|
15919 |
+
"loss": 1.9653,
|
15920 |
+
"step": 34095
|
15921 |
+
},
|
15922 |
+
{
|
15923 |
+
"epoch": 0.0664241565047379,
|
15924 |
+
"grad_norm": 2.839843988418579,
|
15925 |
+
"learning_rate": 2.9674585771992277e-05,
|
15926 |
+
"loss": 1.8574,
|
15927 |
+
"step": 34110
|
15928 |
+
},
|
15929 |
+
{
|
15930 |
+
"epoch": 0.06645336677584816,
|
15931 |
+
"grad_norm": 2.976874351501465,
|
15932 |
+
"learning_rate": 2.967430054530865e-05,
|
15933 |
+
"loss": 2.0622,
|
15934 |
+
"step": 34125
|
15935 |
+
},
|
15936 |
+
{
|
15937 |
+
"epoch": 0.06648257704695844,
|
15938 |
+
"grad_norm": 2.1137330532073975,
|
15939 |
+
"learning_rate": 2.967401519505068e-05,
|
15940 |
+
"loss": 1.8188,
|
15941 |
+
"step": 34140
|
15942 |
+
},
|
15943 |
+
{
|
15944 |
+
"epoch": 0.0665117873180687,
|
15945 |
+
"grad_norm": 2.7060563564300537,
|
15946 |
+
"learning_rate": 2.9673729721220765e-05,
|
15947 |
+
"loss": 1.9227,
|
15948 |
+
"step": 34155
|
15949 |
+
},
|
15950 |
+
{
|
15951 |
+
"epoch": 0.06654099758917896,
|
15952 |
+
"grad_norm": 2.3645975589752197,
|
15953 |
+
"learning_rate": 2.9673444123821306e-05,
|
15954 |
+
"loss": 1.8725,
|
15955 |
+
"step": 34170
|
15956 |
+
},
|
15957 |
+
{
|
15958 |
+
"epoch": 0.06657020786028922,
|
15959 |
+
"grad_norm": 2.382066011428833,
|
15960 |
+
"learning_rate": 2.967315840285471e-05,
|
15961 |
+
"loss": 1.7719,
|
15962 |
+
"step": 34185
|
15963 |
+
},
|
15964 |
+
{
|
15965 |
+
"epoch": 0.06659941813139948,
|
15966 |
+
"grad_norm": 1.7729606628417969,
|
15967 |
+
"learning_rate": 2.9672872558323385e-05,
|
15968 |
+
"loss": 1.8138,
|
15969 |
+
"step": 34200
|
15970 |
+
},
|
15971 |
+
{
|
15972 |
+
"epoch": 0.06662862840250974,
|
15973 |
+
"grad_norm": 2.7769312858581543,
|
15974 |
+
"learning_rate": 2.9672586590229735e-05,
|
15975 |
+
"loss": 1.8957,
|
15976 |
+
"step": 34215
|
15977 |
+
},
|
15978 |
+
{
|
15979 |
+
"epoch": 0.06665783867362,
|
15980 |
+
"grad_norm": 4.402248382568359,
|
15981 |
+
"learning_rate": 2.9672300498576173e-05,
|
15982 |
+
"loss": 1.9404,
|
15983 |
+
"step": 34230
|
15984 |
+
},
|
15985 |
+
{
|
15986 |
+
"epoch": 0.06668704894473028,
|
15987 |
+
"grad_norm": 4.229050159454346,
|
15988 |
+
"learning_rate": 2.96720142833651e-05,
|
15989 |
+
"loss": 1.8112,
|
15990 |
+
"step": 34245
|
15991 |
+
},
|
15992 |
+
{
|
15993 |
+
"epoch": 0.06671625921584054,
|
15994 |
+
"grad_norm": 4.353719711303711,
|
15995 |
+
"learning_rate": 2.9671727944598935e-05,
|
15996 |
+
"loss": 1.8216,
|
15997 |
+
"step": 34260
|
15998 |
+
},
|
15999 |
+
{
|
16000 |
+
"epoch": 0.0667454694869508,
|
16001 |
+
"grad_norm": 3.652221918106079,
|
16002 |
+
"learning_rate": 2.9671441482280083e-05,
|
16003 |
+
"loss": 1.9569,
|
16004 |
+
"step": 34275
|
16005 |
+
},
|
16006 |
+
{
|
16007 |
+
"epoch": 0.06677467975806106,
|
16008 |
+
"grad_norm": 2.7157819271087646,
|
16009 |
+
"learning_rate": 2.9671154896410962e-05,
|
16010 |
+
"loss": 1.7512,
|
16011 |
+
"step": 34290
|
16012 |
+
},
|
16013 |
+
{
|
16014 |
+
"epoch": 0.06680389002917132,
|
16015 |
+
"grad_norm": 2.719609498977661,
|
16016 |
+
"learning_rate": 2.9670868186993982e-05,
|
16017 |
+
"loss": 1.9252,
|
16018 |
+
"step": 34305
|
16019 |
+
},
|
16020 |
+
{
|
16021 |
+
"epoch": 0.06683310030028158,
|
16022 |
+
"grad_norm": 2.834977865219116,
|
16023 |
+
"learning_rate": 2.967058135403155e-05,
|
16024 |
+
"loss": 1.7615,
|
16025 |
+
"step": 34320
|
16026 |
+
},
|
16027 |
+
{
|
16028 |
+
"epoch": 0.06686231057139184,
|
16029 |
+
"grad_norm": 2.27976393699646,
|
16030 |
+
"learning_rate": 2.9670294397526097e-05,
|
16031 |
+
"loss": 1.7578,
|
16032 |
+
"step": 34335
|
16033 |
+
},
|
16034 |
+
{
|
16035 |
+
"epoch": 0.06689152084250212,
|
16036 |
+
"grad_norm": 1.7607027292251587,
|
16037 |
+
"learning_rate": 2.967000731748003e-05,
|
16038 |
+
"loss": 1.8367,
|
16039 |
+
"step": 34350
|
16040 |
+
},
|
16041 |
+
{
|
16042 |
+
"epoch": 0.06692073111361238,
|
16043 |
+
"grad_norm": 3.453352451324463,
|
16044 |
+
"learning_rate": 2.9669720113895763e-05,
|
16045 |
+
"loss": 1.8645,
|
16046 |
+
"step": 34365
|
16047 |
+
},
|
16048 |
+
{
|
16049 |
+
"epoch": 0.06694994138472264,
|
16050 |
+
"grad_norm": 2.4427194595336914,
|
16051 |
+
"learning_rate": 2.9669432786775727e-05,
|
16052 |
+
"loss": 1.8592,
|
16053 |
+
"step": 34380
|
16054 |
+
},
|
16055 |
+
{
|
16056 |
+
"epoch": 0.0669791516558329,
|
16057 |
+
"grad_norm": 3.492604970932007,
|
16058 |
+
"learning_rate": 2.9669145336122335e-05,
|
16059 |
+
"loss": 1.8394,
|
16060 |
+
"step": 34395
|
16061 |
+
},
|
16062 |
+
{
|
16063 |
+
"epoch": 0.06700836192694316,
|
16064 |
+
"grad_norm": 2.3918869495391846,
|
16065 |
+
"learning_rate": 2.9668857761938e-05,
|
16066 |
+
"loss": 1.7539,
|
16067 |
+
"step": 34410
|
16068 |
+
},
|
16069 |
+
{
|
16070 |
+
"epoch": 0.06703757219805342,
|
16071 |
+
"grad_norm": 1.826912760734558,
|
16072 |
+
"learning_rate": 2.9668570064225156e-05,
|
16073 |
+
"loss": 1.8747,
|
16074 |
+
"step": 34425
|
16075 |
+
},
|
16076 |
+
{
|
16077 |
+
"epoch": 0.06706678246916369,
|
16078 |
+
"grad_norm": 4.5764241218566895,
|
16079 |
+
"learning_rate": 2.966828224298622e-05,
|
16080 |
+
"loss": 1.8474,
|
16081 |
+
"step": 34440
|
16082 |
+
},
|
16083 |
+
{
|
16084 |
+
"epoch": 0.06709599274027396,
|
16085 |
+
"grad_norm": 3.9237124919891357,
|
16086 |
+
"learning_rate": 2.9667994298223612e-05,
|
16087 |
+
"loss": 1.8965,
|
16088 |
+
"step": 34455
|
16089 |
+
},
|
16090 |
+
{
|
16091 |
+
"epoch": 0.06712520301138422,
|
16092 |
+
"grad_norm": 3.064443826675415,
|
16093 |
+
"learning_rate": 2.9667706229939765e-05,
|
16094 |
+
"loss": 1.8549,
|
16095 |
+
"step": 34470
|
16096 |
+
},
|
16097 |
+
{
|
16098 |
+
"epoch": 0.06715441328249448,
|
16099 |
+
"grad_norm": 2.0611674785614014,
|
16100 |
+
"learning_rate": 2.96674180381371e-05,
|
16101 |
+
"loss": 1.8735,
|
16102 |
+
"step": 34485
|
16103 |
+
},
|
16104 |
+
{
|
16105 |
+
"epoch": 0.06718362355360474,
|
16106 |
+
"grad_norm": 3.887948513031006,
|
16107 |
+
"learning_rate": 2.9667129722818044e-05,
|
16108 |
+
"loss": 1.973,
|
16109 |
+
"step": 34500
|
16110 |
+
},
|
16111 |
+
{
|
16112 |
+
"epoch": 0.067212833824715,
|
16113 |
+
"grad_norm": 2.5983119010925293,
|
16114 |
+
"learning_rate": 2.966684128398503e-05,
|
16115 |
+
"loss": 1.8455,
|
16116 |
+
"step": 34515
|
16117 |
+
},
|
16118 |
+
{
|
16119 |
+
"epoch": 0.06724204409582527,
|
16120 |
+
"grad_norm": 3.6295909881591797,
|
16121 |
+
"learning_rate": 2.9666552721640474e-05,
|
16122 |
+
"loss": 1.738,
|
16123 |
+
"step": 34530
|
16124 |
+
},
|
16125 |
+
{
|
16126 |
+
"epoch": 0.06727125436693553,
|
16127 |
+
"grad_norm": 2.456125020980835,
|
16128 |
+
"learning_rate": 2.966626403578682e-05,
|
16129 |
+
"loss": 1.8651,
|
16130 |
+
"step": 34545
|
16131 |
+
},
|
16132 |
+
{
|
16133 |
+
"epoch": 0.06730046463804579,
|
16134 |
+
"grad_norm": 1.6152453422546387,
|
16135 |
+
"learning_rate": 2.966597522642649e-05,
|
16136 |
+
"loss": 1.8728,
|
16137 |
+
"step": 34560
|
16138 |
+
},
|
16139 |
+
{
|
16140 |
+
"epoch": 0.06732967490915606,
|
16141 |
+
"grad_norm": 2.789376974105835,
|
16142 |
+
"learning_rate": 2.966568629356193e-05,
|
16143 |
+
"loss": 1.8996,
|
16144 |
+
"step": 34575
|
16145 |
+
},
|
16146 |
+
{
|
16147 |
+
"epoch": 0.06735888518026632,
|
16148 |
+
"grad_norm": 3.9699041843414307,
|
16149 |
+
"learning_rate": 2.9665397237195555e-05,
|
16150 |
+
"loss": 1.7645,
|
16151 |
+
"step": 34590
|
16152 |
+
},
|
16153 |
+
{
|
16154 |
+
"epoch": 0.06738809545137658,
|
16155 |
+
"grad_norm": 2.7433955669403076,
|
16156 |
+
"learning_rate": 2.966510805732981e-05,
|
16157 |
+
"loss": 1.885,
|
16158 |
+
"step": 34605
|
16159 |
+
},
|
16160 |
+
{
|
16161 |
+
"epoch": 0.06741730572248684,
|
16162 |
+
"grad_norm": 3.254692316055298,
|
16163 |
+
"learning_rate": 2.9664818753967123e-05,
|
16164 |
+
"loss": 1.7664,
|
16165 |
+
"step": 34620
|
16166 |
+
},
|
16167 |
+
{
|
16168 |
+
"epoch": 0.0674465159935971,
|
16169 |
+
"grad_norm": 2.647752285003662,
|
16170 |
+
"learning_rate": 2.966452932710994e-05,
|
16171 |
+
"loss": 1.8037,
|
16172 |
+
"step": 34635
|
16173 |
+
},
|
16174 |
+
{
|
16175 |
+
"epoch": 0.06747572626470737,
|
16176 |
+
"grad_norm": 3.770737648010254,
|
16177 |
+
"learning_rate": 2.966423977676069e-05,
|
16178 |
+
"loss": 1.7053,
|
16179 |
+
"step": 34650
|
16180 |
+
},
|
16181 |
+
{
|
16182 |
+
"epoch": 0.06750493653581763,
|
16183 |
+
"grad_norm": 2.101461410522461,
|
16184 |
+
"learning_rate": 2.9663950102921814e-05,
|
16185 |
+
"loss": 1.8039,
|
16186 |
+
"step": 34665
|
16187 |
+
},
|
16188 |
+
{
|
16189 |
+
"epoch": 0.0675341468069279,
|
16190 |
+
"grad_norm": 4.246973514556885,
|
16191 |
+
"learning_rate": 2.9663660305595754e-05,
|
16192 |
+
"loss": 1.776,
|
16193 |
+
"step": 34680
|
16194 |
+
},
|
16195 |
+
{
|
16196 |
+
"epoch": 0.06756335707803816,
|
16197 |
+
"grad_norm": 6.2981157302856445,
|
16198 |
+
"learning_rate": 2.9663370384784946e-05,
|
16199 |
+
"loss": 1.7988,
|
16200 |
+
"step": 34695
|
16201 |
+
},
|
16202 |
+
{
|
16203 |
+
"epoch": 0.06759256734914842,
|
16204 |
+
"grad_norm": 2.8231287002563477,
|
16205 |
+
"learning_rate": 2.9663080340491838e-05,
|
16206 |
+
"loss": 1.9157,
|
16207 |
+
"step": 34710
|
16208 |
+
},
|
16209 |
+
{
|
16210 |
+
"epoch": 0.06762177762025869,
|
16211 |
+
"grad_norm": 2.1489720344543457,
|
16212 |
+
"learning_rate": 2.9662790172718867e-05,
|
16213 |
+
"loss": 1.7745,
|
16214 |
+
"step": 34725
|
16215 |
+
},
|
16216 |
+
{
|
16217 |
+
"epoch": 0.06765098789136895,
|
16218 |
+
"grad_norm": 2.714576482772827,
|
16219 |
+
"learning_rate": 2.9662499881468475e-05,
|
16220 |
+
"loss": 1.8596,
|
16221 |
+
"step": 34740
|
16222 |
+
},
|
16223 |
+
{
|
16224 |
+
"epoch": 0.06768019816247921,
|
16225 |
+
"grad_norm": 2.1026928424835205,
|
16226 |
+
"learning_rate": 2.966220946674311e-05,
|
16227 |
+
"loss": 1.9096,
|
16228 |
+
"step": 34755
|
16229 |
+
},
|
16230 |
+
{
|
16231 |
+
"epoch": 0.06770940843358947,
|
16232 |
+
"grad_norm": 4.316662311553955,
|
16233 |
+
"learning_rate": 2.9661918928545215e-05,
|
16234 |
+
"loss": 1.8735,
|
16235 |
+
"step": 34770
|
16236 |
+
},
|
16237 |
+
{
|
16238 |
+
"epoch": 0.06773861870469974,
|
16239 |
+
"grad_norm": 3.814514636993408,
|
16240 |
+
"learning_rate": 2.966162826687724e-05,
|
16241 |
+
"loss": 1.9701,
|
16242 |
+
"step": 34785
|
16243 |
+
},
|
16244 |
+
{
|
16245 |
+
"epoch": 0.06776782897581,
|
16246 |
+
"grad_norm": 4.129803657531738,
|
16247 |
+
"learning_rate": 2.966133748174164e-05,
|
16248 |
+
"loss": 1.9273,
|
16249 |
+
"step": 34800
|
16250 |
+
},
|
16251 |
+
{
|
16252 |
+
"epoch": 0.06779703924692027,
|
16253 |
+
"grad_norm": 1.9344481229782104,
|
16254 |
+
"learning_rate": 2.966104657314084e-05,
|
16255 |
+
"loss": 1.8738,
|
16256 |
+
"step": 34815
|
16257 |
+
},
|
16258 |
+
{
|
16259 |
+
"epoch": 0.06782624951803053,
|
16260 |
+
"grad_norm": 5.156272888183594,
|
16261 |
+
"learning_rate": 2.9660755541077314e-05,
|
16262 |
+
"loss": 1.9722,
|
16263 |
+
"step": 34830
|
16264 |
+
},
|
16265 |
+
{
|
16266 |
+
"epoch": 0.06785545978914079,
|
16267 |
+
"grad_norm": 2.4508838653564453,
|
16268 |
+
"learning_rate": 2.9660464385553504e-05,
|
16269 |
+
"loss": 1.8354,
|
16270 |
+
"step": 34845
|
16271 |
+
},
|
16272 |
+
{
|
16273 |
+
"epoch": 0.06788467006025105,
|
16274 |
+
"grad_norm": 4.646978855133057,
|
16275 |
+
"learning_rate": 2.9660173106571856e-05,
|
16276 |
+
"loss": 1.6924,
|
16277 |
+
"step": 34860
|
16278 |
+
},
|
16279 |
+
{
|
16280 |
+
"epoch": 0.06791388033136131,
|
16281 |
+
"grad_norm": 4.116873741149902,
|
16282 |
+
"learning_rate": 2.9659881704134835e-05,
|
16283 |
+
"loss": 1.9238,
|
16284 |
+
"step": 34875
|
16285 |
+
},
|
16286 |
+
{
|
16287 |
+
"epoch": 0.06794309060247158,
|
16288 |
+
"grad_norm": 2.2530601024627686,
|
16289 |
+
"learning_rate": 2.965959017824488e-05,
|
16290 |
+
"loss": 1.744,
|
16291 |
+
"step": 34890
|
16292 |
+
},
|
16293 |
+
{
|
16294 |
+
"epoch": 0.06797230087358185,
|
16295 |
+
"grad_norm": 3.1118083000183105,
|
16296 |
+
"learning_rate": 2.9659298528904462e-05,
|
16297 |
+
"loss": 1.7291,
|
16298 |
+
"step": 34905
|
16299 |
+
},
|
16300 |
+
{
|
16301 |
+
"epoch": 0.0680015111446921,
|
16302 |
+
"grad_norm": 4.10203218460083,
|
16303 |
+
"learning_rate": 2.9659006756116024e-05,
|
16304 |
+
"loss": 1.9676,
|
16305 |
+
"step": 34920
|
16306 |
+
},
|
16307 |
+
{
|
16308 |
+
"epoch": 0.06803072141580237,
|
16309 |
+
"grad_norm": 2.487515926361084,
|
16310 |
+
"learning_rate": 2.9658714859882033e-05,
|
16311 |
+
"loss": 1.755,
|
16312 |
+
"step": 34935
|
16313 |
+
},
|
16314 |
+
{
|
16315 |
+
"epoch": 0.06805993168691263,
|
16316 |
+
"grad_norm": 4.722667694091797,
|
16317 |
+
"learning_rate": 2.965842284020494e-05,
|
16318 |
+
"loss": 1.6132,
|
16319 |
+
"step": 34950
|
16320 |
+
},
|
16321 |
+
{
|
16322 |
+
"epoch": 0.06808914195802289,
|
16323 |
+
"grad_norm": 3.8074474334716797,
|
16324 |
+
"learning_rate": 2.9658130697087206e-05,
|
16325 |
+
"loss": 1.8949,
|
16326 |
+
"step": 34965
|
16327 |
+
},
|
16328 |
+
{
|
16329 |
+
"epoch": 0.06811835222913315,
|
16330 |
+
"grad_norm": 3.2412679195404053,
|
16331 |
+
"learning_rate": 2.9657838430531295e-05,
|
16332 |
+
"loss": 1.8062,
|
16333 |
+
"step": 34980
|
16334 |
+
},
|
16335 |
+
{
|
16336 |
+
"epoch": 0.06814756250024342,
|
16337 |
+
"grad_norm": 3.2179923057556152,
|
16338 |
+
"learning_rate": 2.9657546040539662e-05,
|
16339 |
+
"loss": 1.7753,
|
16340 |
+
"step": 34995
|
16341 |
}
|
16342 |
],
|
16343 |
"logging_steps": 15,
|
|
|
16357 |
"attributes": {}
|
16358 |
}
|
16359 |
},
|
16360 |
+
"total_flos": 5.536910190624768e+16,
|
16361 |
"train_batch_size": 4,
|
16362 |
"trial_name": null,
|
16363 |
"trial_params": null
|