Training in progress, step 3000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b0a789e71e02f4f859c128d91086b86c69bb7ae9f01c97e4f195af72afff004
|
3 |
size 50624
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 111142
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fb78d46fffae00a13d0526987c7b047340e8e343db76d12fc0a5600e3b3b861
|
3 |
size 111142
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c53a9630e6978c05d301e5d7e5200b5d2c8eea7ef124884409824d8ad349fc02
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b7a58285d5e0bbfbdededdbf4101ad8a6d7c67b1022483e5d546b1b401edbd2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 11.642865180969238,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2800",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20547,6 +20547,714 @@
|
|
20547 |
"eval_samples_per_second": 123.96,
|
20548 |
"eval_steps_per_second": 30.99,
|
20549 |
"step": 2900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20550 |
}
|
20551 |
],
|
20552 |
"logging_steps": 1,
|
@@ -20561,7 +21269,7 @@
|
|
20561 |
"early_stopping_threshold": 0.0
|
20562 |
},
|
20563 |
"attributes": {
|
20564 |
-
"early_stopping_patience_counter":
|
20565 |
}
|
20566 |
},
|
20567 |
"TrainerControl": {
|
@@ -20570,12 +21278,12 @@
|
|
20570 |
"should_evaluate": false,
|
20571 |
"should_log": false,
|
20572 |
"should_save": true,
|
20573 |
-
"should_training_stop":
|
20574 |
},
|
20575 |
"attributes": {}
|
20576 |
}
|
20577 |
},
|
20578 |
-
"total_flos":
|
20579 |
"train_batch_size": 4,
|
20580 |
"trial_name": null,
|
20581 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 11.642865180969238,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2800",
|
4 |
+
"epoch": 0.8651454525792149,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20547 |
"eval_samples_per_second": 123.96,
|
20548 |
"eval_steps_per_second": 30.99,
|
20549 |
"step": 2900
|
20550 |
+
},
|
20551 |
+
{
|
20552 |
+
"epoch": 0.8365956526441007,
|
20553 |
+
"grad_norm": 0.18284933269023895,
|
20554 |
+
"learning_rate": 4.48232259229554e-05,
|
20555 |
+
"loss": 11.6659,
|
20556 |
+
"step": 2901
|
20557 |
+
},
|
20558 |
+
{
|
20559 |
+
"epoch": 0.8368840344616272,
|
20560 |
+
"grad_norm": 0.13595181703567505,
|
20561 |
+
"learning_rate": 4.476106562329155e-05,
|
20562 |
+
"loss": 11.6795,
|
20563 |
+
"step": 2902
|
20564 |
+
},
|
20565 |
+
{
|
20566 |
+
"epoch": 0.8371724162791536,
|
20567 |
+
"grad_norm": 0.1296217292547226,
|
20568 |
+
"learning_rate": 4.46989360248704e-05,
|
20569 |
+
"loss": 11.6749,
|
20570 |
+
"step": 2903
|
20571 |
+
},
|
20572 |
+
{
|
20573 |
+
"epoch": 0.8374607980966801,
|
20574 |
+
"grad_norm": 0.18848179280757904,
|
20575 |
+
"learning_rate": 4.4636837162222965e-05,
|
20576 |
+
"loss": 11.6528,
|
20577 |
+
"step": 2904
|
20578 |
+
},
|
20579 |
+
{
|
20580 |
+
"epoch": 0.8377491799142064,
|
20581 |
+
"grad_norm": 0.16293784976005554,
|
20582 |
+
"learning_rate": 4.4574769069863075e-05,
|
20583 |
+
"loss": 11.6722,
|
20584 |
+
"step": 2905
|
20585 |
+
},
|
20586 |
+
{
|
20587 |
+
"epoch": 0.8380375617317328,
|
20588 |
+
"grad_norm": 0.18424579501152039,
|
20589 |
+
"learning_rate": 4.451273178228764e-05,
|
20590 |
+
"loss": 11.6811,
|
20591 |
+
"step": 2906
|
20592 |
+
},
|
20593 |
+
{
|
20594 |
+
"epoch": 0.8383259435492593,
|
20595 |
+
"grad_norm": 0.17344294488430023,
|
20596 |
+
"learning_rate": 4.445072533397635e-05,
|
20597 |
+
"loss": 11.6816,
|
20598 |
+
"step": 2907
|
20599 |
+
},
|
20600 |
+
{
|
20601 |
+
"epoch": 0.8386143253667856,
|
20602 |
+
"grad_norm": 0.13144327700138092,
|
20603 |
+
"learning_rate": 4.4388749759391754e-05,
|
20604 |
+
"loss": 11.6759,
|
20605 |
+
"step": 2908
|
20606 |
+
},
|
20607 |
+
{
|
20608 |
+
"epoch": 0.838902707184312,
|
20609 |
+
"grad_norm": 0.18985804915428162,
|
20610 |
+
"learning_rate": 4.432680509297924e-05,
|
20611 |
+
"loss": 11.6346,
|
20612 |
+
"step": 2909
|
20613 |
+
},
|
20614 |
+
{
|
20615 |
+
"epoch": 0.8391910890018385,
|
20616 |
+
"grad_norm": 0.16098152101039886,
|
20617 |
+
"learning_rate": 4.426489136916704e-05,
|
20618 |
+
"loss": 11.6683,
|
20619 |
+
"step": 2910
|
20620 |
+
},
|
20621 |
+
{
|
20622 |
+
"epoch": 0.8394794708193648,
|
20623 |
+
"grad_norm": 0.1585586965084076,
|
20624 |
+
"learning_rate": 4.420300862236617e-05,
|
20625 |
+
"loss": 11.6843,
|
20626 |
+
"step": 2911
|
20627 |
+
},
|
20628 |
+
{
|
20629 |
+
"epoch": 0.8397678526368912,
|
20630 |
+
"grad_norm": 0.25072965025901794,
|
20631 |
+
"learning_rate": 4.4141156886970445e-05,
|
20632 |
+
"loss": 11.6972,
|
20633 |
+
"step": 2912
|
20634 |
+
},
|
20635 |
+
{
|
20636 |
+
"epoch": 0.8400562344544177,
|
20637 |
+
"grad_norm": 0.1607411801815033,
|
20638 |
+
"learning_rate": 4.4079336197356436e-05,
|
20639 |
+
"loss": 11.6893,
|
20640 |
+
"step": 2913
|
20641 |
+
},
|
20642 |
+
{
|
20643 |
+
"epoch": 0.840344616271944,
|
20644 |
+
"grad_norm": 0.16909179091453552,
|
20645 |
+
"learning_rate": 4.401754658788344e-05,
|
20646 |
+
"loss": 11.683,
|
20647 |
+
"step": 2914
|
20648 |
+
},
|
20649 |
+
{
|
20650 |
+
"epoch": 0.8406329980894705,
|
20651 |
+
"grad_norm": 0.14870120584964752,
|
20652 |
+
"learning_rate": 4.395578809289349e-05,
|
20653 |
+
"loss": 11.7093,
|
20654 |
+
"step": 2915
|
20655 |
+
},
|
20656 |
+
{
|
20657 |
+
"epoch": 0.8409213799069969,
|
20658 |
+
"grad_norm": 0.2617393136024475,
|
20659 |
+
"learning_rate": 4.389406074671143e-05,
|
20660 |
+
"loss": 11.6255,
|
20661 |
+
"step": 2916
|
20662 |
+
},
|
20663 |
+
{
|
20664 |
+
"epoch": 0.8412097617245232,
|
20665 |
+
"grad_norm": 0.12288763374090195,
|
20666 |
+
"learning_rate": 4.383236458364455e-05,
|
20667 |
+
"loss": 11.6889,
|
20668 |
+
"step": 2917
|
20669 |
+
},
|
20670 |
+
{
|
20671 |
+
"epoch": 0.8414981435420497,
|
20672 |
+
"grad_norm": 0.17696166038513184,
|
20673 |
+
"learning_rate": 4.377069963798309e-05,
|
20674 |
+
"loss": 11.6541,
|
20675 |
+
"step": 2918
|
20676 |
+
},
|
20677 |
+
{
|
20678 |
+
"epoch": 0.8417865253595761,
|
20679 |
+
"grad_norm": 0.165409654378891,
|
20680 |
+
"learning_rate": 4.3709065943999696e-05,
|
20681 |
+
"loss": 11.6684,
|
20682 |
+
"step": 2919
|
20683 |
+
},
|
20684 |
+
{
|
20685 |
+
"epoch": 0.8420749071771025,
|
20686 |
+
"grad_norm": 0.11674615740776062,
|
20687 |
+
"learning_rate": 4.364746353594985e-05,
|
20688 |
+
"loss": 11.6983,
|
20689 |
+
"step": 2920
|
20690 |
+
},
|
20691 |
+
{
|
20692 |
+
"epoch": 0.8423632889946289,
|
20693 |
+
"grad_norm": 0.16131404042243958,
|
20694 |
+
"learning_rate": 4.358589244807144e-05,
|
20695 |
+
"loss": 11.6456,
|
20696 |
+
"step": 2921
|
20697 |
+
},
|
20698 |
+
{
|
20699 |
+
"epoch": 0.8426516708121553,
|
20700 |
+
"grad_norm": 0.15852144360542297,
|
20701 |
+
"learning_rate": 4.352435271458516e-05,
|
20702 |
+
"loss": 11.6479,
|
20703 |
+
"step": 2922
|
20704 |
+
},
|
20705 |
+
{
|
20706 |
+
"epoch": 0.8429400526296817,
|
20707 |
+
"grad_norm": 0.1303740292787552,
|
20708 |
+
"learning_rate": 4.3462844369694124e-05,
|
20709 |
+
"loss": 11.6934,
|
20710 |
+
"step": 2923
|
20711 |
+
},
|
20712 |
+
{
|
20713 |
+
"epoch": 0.8432284344472081,
|
20714 |
+
"grad_norm": 0.08299141377210617,
|
20715 |
+
"learning_rate": 4.3401367447584065e-05,
|
20716 |
+
"loss": 11.7121,
|
20717 |
+
"step": 2924
|
20718 |
+
},
|
20719 |
+
{
|
20720 |
+
"epoch": 0.8435168162647345,
|
20721 |
+
"grad_norm": 0.13092194497585297,
|
20722 |
+
"learning_rate": 4.3339921982423214e-05,
|
20723 |
+
"loss": 11.6822,
|
20724 |
+
"step": 2925
|
20725 |
+
},
|
20726 |
+
{
|
20727 |
+
"epoch": 0.843805198082261,
|
20728 |
+
"grad_norm": 0.17801184952259064,
|
20729 |
+
"learning_rate": 4.327850800836236e-05,
|
20730 |
+
"loss": 11.6495,
|
20731 |
+
"step": 2926
|
20732 |
+
},
|
20733 |
+
{
|
20734 |
+
"epoch": 0.8440935798997873,
|
20735 |
+
"grad_norm": 0.12021403759717941,
|
20736 |
+
"learning_rate": 4.3217125559534764e-05,
|
20737 |
+
"loss": 11.7029,
|
20738 |
+
"step": 2927
|
20739 |
+
},
|
20740 |
+
{
|
20741 |
+
"epoch": 0.8443819617173137,
|
20742 |
+
"grad_norm": 0.12242776900529861,
|
20743 |
+
"learning_rate": 4.315577467005618e-05,
|
20744 |
+
"loss": 11.6713,
|
20745 |
+
"step": 2928
|
20746 |
+
},
|
20747 |
+
{
|
20748 |
+
"epoch": 0.8446703435348402,
|
20749 |
+
"grad_norm": 0.11881870031356812,
|
20750 |
+
"learning_rate": 4.30944553740248e-05,
|
20751 |
+
"loss": 11.6996,
|
20752 |
+
"step": 2929
|
20753 |
+
},
|
20754 |
+
{
|
20755 |
+
"epoch": 0.8449587253523665,
|
20756 |
+
"grad_norm": 0.11722824722528458,
|
20757 |
+
"learning_rate": 4.3033167705521284e-05,
|
20758 |
+
"loss": 11.6695,
|
20759 |
+
"step": 2930
|
20760 |
+
},
|
20761 |
+
{
|
20762 |
+
"epoch": 0.8452471071698929,
|
20763 |
+
"grad_norm": 0.16650351881980896,
|
20764 |
+
"learning_rate": 4.297191169860865e-05,
|
20765 |
+
"loss": 11.6721,
|
20766 |
+
"step": 2931
|
20767 |
+
},
|
20768 |
+
{
|
20769 |
+
"epoch": 0.8455354889874194,
|
20770 |
+
"grad_norm": 0.21880216896533966,
|
20771 |
+
"learning_rate": 4.291068738733248e-05,
|
20772 |
+
"loss": 11.6653,
|
20773 |
+
"step": 2932
|
20774 |
+
},
|
20775 |
+
{
|
20776 |
+
"epoch": 0.8458238708049457,
|
20777 |
+
"grad_norm": 0.16557948291301727,
|
20778 |
+
"learning_rate": 4.28494948057205e-05,
|
20779 |
+
"loss": 11.6768,
|
20780 |
+
"step": 2933
|
20781 |
+
},
|
20782 |
+
{
|
20783 |
+
"epoch": 0.8461122526224721,
|
20784 |
+
"grad_norm": 0.17281831800937653,
|
20785 |
+
"learning_rate": 4.278833398778306e-05,
|
20786 |
+
"loss": 11.6481,
|
20787 |
+
"step": 2934
|
20788 |
+
},
|
20789 |
+
{
|
20790 |
+
"epoch": 0.8464006344399986,
|
20791 |
+
"grad_norm": 0.232573002576828,
|
20792 |
+
"learning_rate": 4.272720496751258e-05,
|
20793 |
+
"loss": 11.627,
|
20794 |
+
"step": 2935
|
20795 |
+
},
|
20796 |
+
{
|
20797 |
+
"epoch": 0.846689016257525,
|
20798 |
+
"grad_norm": 0.21352550387382507,
|
20799 |
+
"learning_rate": 4.2666107778884065e-05,
|
20800 |
+
"loss": 11.6415,
|
20801 |
+
"step": 2936
|
20802 |
+
},
|
20803 |
+
{
|
20804 |
+
"epoch": 0.8469773980750513,
|
20805 |
+
"grad_norm": 0.16628918051719666,
|
20806 |
+
"learning_rate": 4.260504245585469e-05,
|
20807 |
+
"loss": 11.6933,
|
20808 |
+
"step": 2937
|
20809 |
+
},
|
20810 |
+
{
|
20811 |
+
"epoch": 0.8472657798925778,
|
20812 |
+
"grad_norm": 0.1400359719991684,
|
20813 |
+
"learning_rate": 4.254400903236394e-05,
|
20814 |
+
"loss": 11.6807,
|
20815 |
+
"step": 2938
|
20816 |
+
},
|
20817 |
+
{
|
20818 |
+
"epoch": 0.8475541617101042,
|
20819 |
+
"grad_norm": 0.1728946715593338,
|
20820 |
+
"learning_rate": 4.24830075423336e-05,
|
20821 |
+
"loss": 11.6956,
|
20822 |
+
"step": 2939
|
20823 |
+
},
|
20824 |
+
{
|
20825 |
+
"epoch": 0.8478425435276306,
|
20826 |
+
"grad_norm": 0.17397397756576538,
|
20827 |
+
"learning_rate": 4.242203801966759e-05,
|
20828 |
+
"loss": 11.6632,
|
20829 |
+
"step": 2940
|
20830 |
+
},
|
20831 |
+
{
|
20832 |
+
"epoch": 0.848130925345157,
|
20833 |
+
"grad_norm": 0.20304632186889648,
|
20834 |
+
"learning_rate": 4.236110049825228e-05,
|
20835 |
+
"loss": 11.6562,
|
20836 |
+
"step": 2941
|
20837 |
+
},
|
20838 |
+
{
|
20839 |
+
"epoch": 0.8484193071626834,
|
20840 |
+
"grad_norm": 0.15627586841583252,
|
20841 |
+
"learning_rate": 4.230019501195601e-05,
|
20842 |
+
"loss": 11.6637,
|
20843 |
+
"step": 2942
|
20844 |
+
},
|
20845 |
+
{
|
20846 |
+
"epoch": 0.8487076889802098,
|
20847 |
+
"grad_norm": 0.14984051883220673,
|
20848 |
+
"learning_rate": 4.223932159462954e-05,
|
20849 |
+
"loss": 11.6232,
|
20850 |
+
"step": 2943
|
20851 |
+
},
|
20852 |
+
{
|
20853 |
+
"epoch": 0.8489960707977362,
|
20854 |
+
"grad_norm": 0.1769644021987915,
|
20855 |
+
"learning_rate": 4.2178480280105645e-05,
|
20856 |
+
"loss": 11.6534,
|
20857 |
+
"step": 2944
|
20858 |
+
},
|
20859 |
+
{
|
20860 |
+
"epoch": 0.8492844526152626,
|
20861 |
+
"grad_norm": 0.12905630469322205,
|
20862 |
+
"learning_rate": 4.211767110219934e-05,
|
20863 |
+
"loss": 11.7013,
|
20864 |
+
"step": 2945
|
20865 |
+
},
|
20866 |
+
{
|
20867 |
+
"epoch": 0.849572834432789,
|
20868 |
+
"grad_norm": 0.15312866866588593,
|
20869 |
+
"learning_rate": 4.2056894094707734e-05,
|
20870 |
+
"loss": 11.6644,
|
20871 |
+
"step": 2946
|
20872 |
+
},
|
20873 |
+
{
|
20874 |
+
"epoch": 0.8498612162503154,
|
20875 |
+
"grad_norm": 0.1332063376903534,
|
20876 |
+
"learning_rate": 4.199614929141008e-05,
|
20877 |
+
"loss": 11.6627,
|
20878 |
+
"step": 2947
|
20879 |
+
},
|
20880 |
+
{
|
20881 |
+
"epoch": 0.8501495980678418,
|
20882 |
+
"grad_norm": 0.18677252531051636,
|
20883 |
+
"learning_rate": 4.1935436726067735e-05,
|
20884 |
+
"loss": 11.6428,
|
20885 |
+
"step": 2948
|
20886 |
+
},
|
20887 |
+
{
|
20888 |
+
"epoch": 0.8504379798853682,
|
20889 |
+
"grad_norm": 0.15126672387123108,
|
20890 |
+
"learning_rate": 4.1874756432424123e-05,
|
20891 |
+
"loss": 11.6711,
|
20892 |
+
"step": 2949
|
20893 |
+
},
|
20894 |
+
{
|
20895 |
+
"epoch": 0.8507263617028946,
|
20896 |
+
"grad_norm": 0.17418427765369415,
|
20897 |
+
"learning_rate": 4.181410844420474e-05,
|
20898 |
+
"loss": 11.6944,
|
20899 |
+
"step": 2950
|
20900 |
+
},
|
20901 |
+
{
|
20902 |
+
"epoch": 0.8510147435204211,
|
20903 |
+
"grad_norm": 0.19993005692958832,
|
20904 |
+
"learning_rate": 4.175349279511712e-05,
|
20905 |
+
"loss": 11.6651,
|
20906 |
+
"step": 2951
|
20907 |
+
},
|
20908 |
+
{
|
20909 |
+
"epoch": 0.8513031253379475,
|
20910 |
+
"grad_norm": 0.18031910061836243,
|
20911 |
+
"learning_rate": 4.169290951885081e-05,
|
20912 |
+
"loss": 11.6374,
|
20913 |
+
"step": 2952
|
20914 |
+
},
|
20915 |
+
{
|
20916 |
+
"epoch": 0.8515915071554738,
|
20917 |
+
"grad_norm": 0.1512380689382553,
|
20918 |
+
"learning_rate": 4.1632358649077475e-05,
|
20919 |
+
"loss": 11.6633,
|
20920 |
+
"step": 2953
|
20921 |
+
},
|
20922 |
+
{
|
20923 |
+
"epoch": 0.8518798889730003,
|
20924 |
+
"grad_norm": 0.1253197342157364,
|
20925 |
+
"learning_rate": 4.157184021945054e-05,
|
20926 |
+
"loss": 11.6864,
|
20927 |
+
"step": 2954
|
20928 |
+
},
|
20929 |
+
{
|
20930 |
+
"epoch": 0.8521682707905267,
|
20931 |
+
"grad_norm": 0.1121377944946289,
|
20932 |
+
"learning_rate": 4.1511354263605684e-05,
|
20933 |
+
"loss": 11.6855,
|
20934 |
+
"step": 2955
|
20935 |
+
},
|
20936 |
+
{
|
20937 |
+
"epoch": 0.852456652608053,
|
20938 |
+
"grad_norm": 0.23518823087215424,
|
20939 |
+
"learning_rate": 4.145090081516024e-05,
|
20940 |
+
"loss": 11.668,
|
20941 |
+
"step": 2956
|
20942 |
+
},
|
20943 |
+
{
|
20944 |
+
"epoch": 0.8527450344255795,
|
20945 |
+
"grad_norm": 0.15132634341716766,
|
20946 |
+
"learning_rate": 4.139047990771378e-05,
|
20947 |
+
"loss": 11.6768,
|
20948 |
+
"step": 2957
|
20949 |
+
},
|
20950 |
+
{
|
20951 |
+
"epoch": 0.8530334162431059,
|
20952 |
+
"grad_norm": 0.11952768266201019,
|
20953 |
+
"learning_rate": 4.1330091574847496e-05,
|
20954 |
+
"loss": 11.7125,
|
20955 |
+
"step": 2958
|
20956 |
+
},
|
20957 |
+
{
|
20958 |
+
"epoch": 0.8533217980606322,
|
20959 |
+
"grad_norm": 0.11574079841375351,
|
20960 |
+
"learning_rate": 4.1269735850124704e-05,
|
20961 |
+
"loss": 11.7085,
|
20962 |
+
"step": 2959
|
20963 |
+
},
|
20964 |
+
{
|
20965 |
+
"epoch": 0.8536101798781587,
|
20966 |
+
"grad_norm": 0.1615985929965973,
|
20967 |
+
"learning_rate": 4.1209412767090484e-05,
|
20968 |
+
"loss": 11.6949,
|
20969 |
+
"step": 2960
|
20970 |
+
},
|
20971 |
+
{
|
20972 |
+
"epoch": 0.8538985616956851,
|
20973 |
+
"grad_norm": 0.15752598643302917,
|
20974 |
+
"learning_rate": 4.11491223592718e-05,
|
20975 |
+
"loss": 11.6571,
|
20976 |
+
"step": 2961
|
20977 |
+
},
|
20978 |
+
{
|
20979 |
+
"epoch": 0.8541869435132114,
|
20980 |
+
"grad_norm": 0.2088017612695694,
|
20981 |
+
"learning_rate": 4.1088864660177425e-05,
|
20982 |
+
"loss": 11.6527,
|
20983 |
+
"step": 2962
|
20984 |
+
},
|
20985 |
+
{
|
20986 |
+
"epoch": 0.8544753253307379,
|
20987 |
+
"grad_norm": 0.12710094451904297,
|
20988 |
+
"learning_rate": 4.1028639703298025e-05,
|
20989 |
+
"loss": 11.6452,
|
20990 |
+
"step": 2963
|
20991 |
+
},
|
20992 |
+
{
|
20993 |
+
"epoch": 0.8547637071482643,
|
20994 |
+
"grad_norm": 0.15736031532287598,
|
20995 |
+
"learning_rate": 4.096844752210598e-05,
|
20996 |
+
"loss": 11.6585,
|
20997 |
+
"step": 2964
|
20998 |
+
},
|
20999 |
+
{
|
21000 |
+
"epoch": 0.8550520889657907,
|
21001 |
+
"grad_norm": 0.21575695276260376,
|
21002 |
+
"learning_rate": 4.090828815005553e-05,
|
21003 |
+
"loss": 11.6362,
|
21004 |
+
"step": 2965
|
21005 |
+
},
|
21006 |
+
{
|
21007 |
+
"epoch": 0.8553404707833171,
|
21008 |
+
"grad_norm": 0.15497878193855286,
|
21009 |
+
"learning_rate": 4.0848161620582625e-05,
|
21010 |
+
"loss": 11.7088,
|
21011 |
+
"step": 2966
|
21012 |
+
},
|
21013 |
+
{
|
21014 |
+
"epoch": 0.8556288526008435,
|
21015 |
+
"grad_norm": 0.17160211503505707,
|
21016 |
+
"learning_rate": 4.078806796710499e-05,
|
21017 |
+
"loss": 11.6504,
|
21018 |
+
"step": 2967
|
21019 |
+
},
|
21020 |
+
{
|
21021 |
+
"epoch": 0.85591723441837,
|
21022 |
+
"grad_norm": 0.19605527818202972,
|
21023 |
+
"learning_rate": 4.072800722302206e-05,
|
21024 |
+
"loss": 11.6268,
|
21025 |
+
"step": 2968
|
21026 |
+
},
|
21027 |
+
{
|
21028 |
+
"epoch": 0.8562056162358963,
|
21029 |
+
"grad_norm": 0.137605220079422,
|
21030 |
+
"learning_rate": 4.0667979421715065e-05,
|
21031 |
+
"loss": 11.666,
|
21032 |
+
"step": 2969
|
21033 |
+
},
|
21034 |
+
{
|
21035 |
+
"epoch": 0.8564939980534227,
|
21036 |
+
"grad_norm": 0.23607651889324188,
|
21037 |
+
"learning_rate": 4.060798459654672e-05,
|
21038 |
+
"loss": 11.6532,
|
21039 |
+
"step": 2970
|
21040 |
+
},
|
21041 |
+
{
|
21042 |
+
"epoch": 0.8567823798709492,
|
21043 |
+
"grad_norm": 0.16709445416927338,
|
21044 |
+
"learning_rate": 4.054802278086168e-05,
|
21045 |
+
"loss": 11.7033,
|
21046 |
+
"step": 2971
|
21047 |
+
},
|
21048 |
+
{
|
21049 |
+
"epoch": 0.8570707616884755,
|
21050 |
+
"grad_norm": 0.17614519596099854,
|
21051 |
+
"learning_rate": 4.0488094007986e-05,
|
21052 |
+
"loss": 11.6642,
|
21053 |
+
"step": 2972
|
21054 |
+
},
|
21055 |
+
{
|
21056 |
+
"epoch": 0.857359143506002,
|
21057 |
+
"grad_norm": 0.15115833282470703,
|
21058 |
+
"learning_rate": 4.042819831122761e-05,
|
21059 |
+
"loss": 11.7041,
|
21060 |
+
"step": 2973
|
21061 |
+
},
|
21062 |
+
{
|
21063 |
+
"epoch": 0.8576475253235284,
|
21064 |
+
"grad_norm": 0.1491180956363678,
|
21065 |
+
"learning_rate": 4.036833572387578e-05,
|
21066 |
+
"loss": 11.6556,
|
21067 |
+
"step": 2974
|
21068 |
+
},
|
21069 |
+
{
|
21070 |
+
"epoch": 0.8579359071410547,
|
21071 |
+
"grad_norm": 0.16634421050548553,
|
21072 |
+
"learning_rate": 4.030850627920166e-05,
|
21073 |
+
"loss": 11.6163,
|
21074 |
+
"step": 2975
|
21075 |
+
},
|
21076 |
+
{
|
21077 |
+
"epoch": 0.8582242889585812,
|
21078 |
+
"grad_norm": 0.12945958971977234,
|
21079 |
+
"learning_rate": 4.024871001045785e-05,
|
21080 |
+
"loss": 11.6816,
|
21081 |
+
"step": 2976
|
21082 |
+
},
|
21083 |
+
{
|
21084 |
+
"epoch": 0.8585126707761076,
|
21085 |
+
"grad_norm": 0.1281767189502716,
|
21086 |
+
"learning_rate": 4.0188946950878404e-05,
|
21087 |
+
"loss": 11.6574,
|
21088 |
+
"step": 2977
|
21089 |
+
},
|
21090 |
+
{
|
21091 |
+
"epoch": 0.8588010525936339,
|
21092 |
+
"grad_norm": 0.18251970410346985,
|
21093 |
+
"learning_rate": 4.012921713367916e-05,
|
21094 |
+
"loss": 11.6412,
|
21095 |
+
"step": 2978
|
21096 |
+
},
|
21097 |
+
{
|
21098 |
+
"epoch": 0.8590894344111604,
|
21099 |
+
"grad_norm": 0.10154789686203003,
|
21100 |
+
"learning_rate": 4.006952059205722e-05,
|
21101 |
+
"loss": 11.6922,
|
21102 |
+
"step": 2979
|
21103 |
+
},
|
21104 |
+
{
|
21105 |
+
"epoch": 0.8593778162286868,
|
21106 |
+
"grad_norm": 0.15670567750930786,
|
21107 |
+
"learning_rate": 4.000985735919143e-05,
|
21108 |
+
"loss": 11.6708,
|
21109 |
+
"step": 2980
|
21110 |
+
},
|
21111 |
+
{
|
21112 |
+
"epoch": 0.8596661980462131,
|
21113 |
+
"grad_norm": 0.13284581899642944,
|
21114 |
+
"learning_rate": 3.995022746824195e-05,
|
21115 |
+
"loss": 11.6794,
|
21116 |
+
"step": 2981
|
21117 |
+
},
|
21118 |
+
{
|
21119 |
+
"epoch": 0.8599545798637396,
|
21120 |
+
"grad_norm": 0.16967470943927765,
|
21121 |
+
"learning_rate": 3.989063095235049e-05,
|
21122 |
+
"loss": 11.6267,
|
21123 |
+
"step": 2982
|
21124 |
+
},
|
21125 |
+
{
|
21126 |
+
"epoch": 0.860242961681266,
|
21127 |
+
"grad_norm": 0.2072441130876541,
|
21128 |
+
"learning_rate": 3.983106784464021e-05,
|
21129 |
+
"loss": 11.6351,
|
21130 |
+
"step": 2983
|
21131 |
+
},
|
21132 |
+
{
|
21133 |
+
"epoch": 0.8605313434987925,
|
21134 |
+
"grad_norm": 0.13071005046367645,
|
21135 |
+
"learning_rate": 3.977153817821566e-05,
|
21136 |
+
"loss": 11.6827,
|
21137 |
+
"step": 2984
|
21138 |
+
},
|
21139 |
+
{
|
21140 |
+
"epoch": 0.8608197253163188,
|
21141 |
+
"grad_norm": 0.19142098724842072,
|
21142 |
+
"learning_rate": 3.971204198616284e-05,
|
21143 |
+
"loss": 11.612,
|
21144 |
+
"step": 2985
|
21145 |
+
},
|
21146 |
+
{
|
21147 |
+
"epoch": 0.8611081071338452,
|
21148 |
+
"grad_norm": 0.19070138037204742,
|
21149 |
+
"learning_rate": 3.965257930154912e-05,
|
21150 |
+
"loss": 11.6318,
|
21151 |
+
"step": 2986
|
21152 |
+
},
|
21153 |
+
{
|
21154 |
+
"epoch": 0.8613964889513717,
|
21155 |
+
"grad_norm": 0.12564696371555328,
|
21156 |
+
"learning_rate": 3.959315015742328e-05,
|
21157 |
+
"loss": 11.6543,
|
21158 |
+
"step": 2987
|
21159 |
+
},
|
21160 |
+
{
|
21161 |
+
"epoch": 0.861684870768898,
|
21162 |
+
"grad_norm": 0.14110830426216125,
|
21163 |
+
"learning_rate": 3.953375458681542e-05,
|
21164 |
+
"loss": 11.6678,
|
21165 |
+
"step": 2988
|
21166 |
+
},
|
21167 |
+
{
|
21168 |
+
"epoch": 0.8619732525864244,
|
21169 |
+
"grad_norm": 0.12227228283882141,
|
21170 |
+
"learning_rate": 3.9474392622736963e-05,
|
21171 |
+
"loss": 11.7028,
|
21172 |
+
"step": 2989
|
21173 |
+
},
|
21174 |
+
{
|
21175 |
+
"epoch": 0.8622616344039509,
|
21176 |
+
"grad_norm": 0.14122307300567627,
|
21177 |
+
"learning_rate": 3.941506429818083e-05,
|
21178 |
+
"loss": 11.6789,
|
21179 |
+
"step": 2990
|
21180 |
+
},
|
21181 |
+
{
|
21182 |
+
"epoch": 0.8625500162214772,
|
21183 |
+
"grad_norm": 0.17069974541664124,
|
21184 |
+
"learning_rate": 3.935576964612092e-05,
|
21185 |
+
"loss": 11.6435,
|
21186 |
+
"step": 2991
|
21187 |
+
},
|
21188 |
+
{
|
21189 |
+
"epoch": 0.8628383980390036,
|
21190 |
+
"grad_norm": 0.1299677938222885,
|
21191 |
+
"learning_rate": 3.929650869951278e-05,
|
21192 |
+
"loss": 11.6771,
|
21193 |
+
"step": 2992
|
21194 |
+
},
|
21195 |
+
{
|
21196 |
+
"epoch": 0.8631267798565301,
|
21197 |
+
"grad_norm": 0.13179805874824524,
|
21198 |
+
"learning_rate": 3.923728149129288e-05,
|
21199 |
+
"loss": 11.6816,
|
21200 |
+
"step": 2993
|
21201 |
+
},
|
21202 |
+
{
|
21203 |
+
"epoch": 0.8634151616740564,
|
21204 |
+
"grad_norm": 0.11812865734100342,
|
21205 |
+
"learning_rate": 3.9178088054379255e-05,
|
21206 |
+
"loss": 11.6985,
|
21207 |
+
"step": 2994
|
21208 |
+
},
|
21209 |
+
{
|
21210 |
+
"epoch": 0.8637035434915828,
|
21211 |
+
"grad_norm": 0.15860992670059204,
|
21212 |
+
"learning_rate": 3.911892842167089e-05,
|
21213 |
+
"loss": 11.7053,
|
21214 |
+
"step": 2995
|
21215 |
+
},
|
21216 |
+
{
|
21217 |
+
"epoch": 0.8639919253091093,
|
21218 |
+
"grad_norm": 0.21352651715278625,
|
21219 |
+
"learning_rate": 3.905980262604819e-05,
|
21220 |
+
"loss": 11.648,
|
21221 |
+
"step": 2996
|
21222 |
+
},
|
21223 |
+
{
|
21224 |
+
"epoch": 0.8642803071266356,
|
21225 |
+
"grad_norm": 0.1386694610118866,
|
21226 |
+
"learning_rate": 3.900071070037267e-05,
|
21227 |
+
"loss": 11.6742,
|
21228 |
+
"step": 2997
|
21229 |
+
},
|
21230 |
+
{
|
21231 |
+
"epoch": 0.8645686889441621,
|
21232 |
+
"grad_norm": 0.14939409494400024,
|
21233 |
+
"learning_rate": 3.894165267748702e-05,
|
21234 |
+
"loss": 11.6969,
|
21235 |
+
"step": 2998
|
21236 |
+
},
|
21237 |
+
{
|
21238 |
+
"epoch": 0.8648570707616885,
|
21239 |
+
"grad_norm": 0.12354502826929092,
|
21240 |
+
"learning_rate": 3.8882628590215074e-05,
|
21241 |
+
"loss": 11.6665,
|
21242 |
+
"step": 2999
|
21243 |
+
},
|
21244 |
+
{
|
21245 |
+
"epoch": 0.8651454525792149,
|
21246 |
+
"grad_norm": 0.19499656558036804,
|
21247 |
+
"learning_rate": 3.8823638471361844e-05,
|
21248 |
+
"loss": 11.666,
|
21249 |
+
"step": 3000
|
21250 |
+
},
|
21251 |
+
{
|
21252 |
+
"epoch": 0.8651454525792149,
|
21253 |
+
"eval_loss": 11.642943382263184,
|
21254 |
+
"eval_runtime": 40.4397,
|
21255 |
+
"eval_samples_per_second": 123.641,
|
21256 |
+
"eval_steps_per_second": 30.91,
|
21257 |
+
"step": 3000
|
21258 |
}
|
21259 |
],
|
21260 |
"logging_steps": 1,
|
|
|
21269 |
"early_stopping_threshold": 0.0
|
21270 |
},
|
21271 |
"attributes": {
|
21272 |
+
"early_stopping_patience_counter": 2
|
21273 |
}
|
21274 |
},
|
21275 |
"TrainerControl": {
|
|
|
21278 |
"should_evaluate": false,
|
21279 |
"should_log": false,
|
21280 |
"should_save": true,
|
21281 |
+
"should_training_stop": true
|
21282 |
},
|
21283 |
"attributes": {}
|
21284 |
}
|
21285 |
},
|
21286 |
+
"total_flos": 2451631876866048.0,
|
21287 |
"train_batch_size": 4,
|
21288 |
"trial_name": null,
|
21289 |
"trial_params": null
|