Training in progress, epoch 11, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b685eb47baafc7984a2354c64f3a633c89ac5489423119e83feaa1b146cbedb
|
3 |
+
size 2372346
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 990409330
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6600748b28844d6445286c2db32da87f8f4ae7df8cb2643db5bb76ae67233234
|
3 |
size 990409330
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4073b96953e05453b4b6e4a3030f1c740fa0e02670dbb4843214e79d4e7e84c5
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b44cd738bffad7be53bf491009475d442fa0e6452600c820ee5979a9a8ce3a05
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.5654487609863281,
|
3 |
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -953,13 +953,110 @@
|
|
953 |
"eval_steps_per_second": 1.037,
|
954 |
"eval_translation_length": 52485,
|
955 |
"step": 63230
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
956 |
}
|
957 |
],
|
958 |
"logging_steps": 500,
|
959 |
"max_steps": 126460,
|
960 |
"num_train_epochs": 20,
|
961 |
"save_steps": 500,
|
962 |
-
"total_flos": 1.
|
963 |
"trial_name": null,
|
964 |
"trial_params": null
|
965 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 1.5654487609863281,
|
3 |
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
|
4 |
+
"epoch": 11.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 69553,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
953 |
"eval_steps_per_second": 1.037,
|
954 |
"eval_translation_length": 52485,
|
955 |
"step": 63230
|
956 |
+
},
|
957 |
+
{
|
958 |
+
"epoch": 10.04,
|
959 |
+
"learning_rate": 2.498752940408342e-06,
|
960 |
+
"loss": 1.7938,
|
961 |
+
"step": 63500
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"epoch": 10.12,
|
965 |
+
"learning_rate": 2.4675773582364977e-06,
|
966 |
+
"loss": 1.7688,
|
967 |
+
"step": 64000
|
968 |
+
},
|
969 |
+
{
|
970 |
+
"epoch": 10.2,
|
971 |
+
"learning_rate": 2.436406818231583e-06,
|
972 |
+
"loss": 1.7701,
|
973 |
+
"step": 64500
|
974 |
+
},
|
975 |
+
{
|
976 |
+
"epoch": 10.28,
|
977 |
+
"learning_rate": 2.4052461678414753e-06,
|
978 |
+
"loss": 1.7821,
|
979 |
+
"step": 65000
|
980 |
+
},
|
981 |
+
{
|
982 |
+
"epoch": 10.36,
|
983 |
+
"learning_rate": 2.37410025297608e-06,
|
984 |
+
"loss": 1.8251,
|
985 |
+
"step": 65500
|
986 |
+
},
|
987 |
+
{
|
988 |
+
"epoch": 10.44,
|
989 |
+
"learning_rate": 2.342973917253726e-06,
|
990 |
+
"loss": 1.7384,
|
991 |
+
"step": 66000
|
992 |
+
},
|
993 |
+
{
|
994 |
+
"epoch": 10.52,
|
995 |
+
"learning_rate": 2.3118720012479183e-06,
|
996 |
+
"loss": 1.8001,
|
997 |
+
"step": 66500
|
998 |
+
},
|
999 |
+
{
|
1000 |
+
"epoch": 10.6,
|
1001 |
+
"learning_rate": 2.280799341734556e-06,
|
1002 |
+
"loss": 1.8386,
|
1003 |
+
"step": 67000
|
1004 |
+
},
|
1005 |
+
{
|
1006 |
+
"epoch": 10.68,
|
1007 |
+
"learning_rate": 2.249760770939754e-06,
|
1008 |
+
"loss": 1.8098,
|
1009 |
+
"step": 67500
|
1010 |
+
},
|
1011 |
+
{
|
1012 |
+
"epoch": 10.75,
|
1013 |
+
"learning_rate": 2.218761115788362e-06,
|
1014 |
+
"loss": 1.8059,
|
1015 |
+
"step": 68000
|
1016 |
+
},
|
1017 |
+
{
|
1018 |
+
"epoch": 10.83,
|
1019 |
+
"learning_rate": 2.1878051971533093e-06,
|
1020 |
+
"loss": 1.757,
|
1021 |
+
"step": 68500
|
1022 |
+
},
|
1023 |
+
{
|
1024 |
+
"epoch": 10.91,
|
1025 |
+
"learning_rate": 2.156897829105898e-06,
|
1026 |
+
"loss": 1.8037,
|
1027 |
+
"step": 69000
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"epoch": 10.99,
|
1031 |
+
"learning_rate": 2.1260438181671446e-06,
|
1032 |
+
"loss": 1.7714,
|
1033 |
+
"step": 69500
|
1034 |
+
},
|
1035 |
+
{
|
1036 |
+
"epoch": 11.0,
|
1037 |
+
"eval_bleu": 1.0,
|
1038 |
+
"eval_brevity_penalty": 1.0,
|
1039 |
+
"eval_length_ratio": 1.0,
|
1040 |
+
"eval_loss": 1.5735211372375488,
|
1041 |
+
"eval_precisions": [
|
1042 |
+
1.0,
|
1043 |
+
1.0,
|
1044 |
+
1.0,
|
1045 |
+
1.0
|
1046 |
+
],
|
1047 |
+
"eval_reference_length": 52469,
|
1048 |
+
"eval_runtime": 678.026,
|
1049 |
+
"eval_samples_per_second": 4.146,
|
1050 |
+
"eval_steps_per_second": 1.037,
|
1051 |
+
"eval_translation_length": 52469,
|
1052 |
+
"step": 69553
|
1053 |
}
|
1054 |
],
|
1055 |
"logging_steps": 500,
|
1056 |
"max_steps": 126460,
|
1057 |
"num_train_epochs": 20,
|
1058 |
"save_steps": 500,
|
1059 |
+
"total_flos": 1.9050018020838605e+17,
|
1060 |
"trial_name": null,
|
1061 |
"trial_params": null
|
1062 |
}
|