Training in progress, step 130, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 125048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f266404b11f932cd639e83d974de617b4287bb271af343f15c1f9f42c44ba741
|
3 |
size 125048
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 162868
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98b00e17d0227c53fea69ecfa2d1996d9c919078652de27239147f7c82b52cef
|
3 |
size 162868
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8a0058f5e4eaf51e57150546d35540d117144afacffb0c679cba8d6cbe11058
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3511d75105f53c278279e3dade6f856082c8693b0424c0bf567bdcf23028dd2b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 11.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1047,6 +1047,92 @@
|
|
1047 |
"eval_samples_per_second": 52.838,
|
1048 |
"eval_steps_per_second": 26.422,
|
1049 |
"step": 120
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1050 |
}
|
1051 |
],
|
1052 |
"logging_steps": 1,
|
@@ -1075,7 +1161,7 @@
|
|
1075 |
"attributes": {}
|
1076 |
}
|
1077 |
},
|
1078 |
-
"total_flos":
|
1079 |
"train_batch_size": 2,
|
1080 |
"trial_name": null,
|
1081 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 11.027961730957031,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-130",
|
4 |
+
"epoch": 0.005875573433368738,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 130,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1047 |
"eval_samples_per_second": 52.838,
|
1048 |
"eval_steps_per_second": 26.422,
|
1049 |
"step": 120
|
1050 |
+
},
|
1051 |
+
{
|
1052 |
+
"epoch": 0.005468802964904748,
|
1053 |
+
"grad_norm": 0.43057698011398315,
|
1054 |
+
"learning_rate": 0.00017572742764761055,
|
1055 |
+
"loss": 44.1271,
|
1056 |
+
"step": 121
|
1057 |
+
},
|
1058 |
+
{
|
1059 |
+
"epoch": 0.005513999683622969,
|
1060 |
+
"grad_norm": 0.5054545402526855,
|
1061 |
+
"learning_rate": 0.00017530714660036112,
|
1062 |
+
"loss": 44.1574,
|
1063 |
+
"step": 122
|
1064 |
+
},
|
1065 |
+
{
|
1066 |
+
"epoch": 0.00555919640234119,
|
1067 |
+
"grad_norm": 0.47395941615104675,
|
1068 |
+
"learning_rate": 0.00017488376997127283,
|
1069 |
+
"loss": 44.0802,
|
1070 |
+
"step": 123
|
1071 |
+
},
|
1072 |
+
{
|
1073 |
+
"epoch": 0.005604393121059411,
|
1074 |
+
"grad_norm": 0.5438507795333862,
|
1075 |
+
"learning_rate": 0.0001744573151637007,
|
1076 |
+
"loss": 44.0974,
|
1077 |
+
"step": 124
|
1078 |
+
},
|
1079 |
+
{
|
1080 |
+
"epoch": 0.005649589839777632,
|
1081 |
+
"grad_norm": 0.5694723129272461,
|
1082 |
+
"learning_rate": 0.00017402779970753155,
|
1083 |
+
"loss": 44.1329,
|
1084 |
+
"step": 125
|
1085 |
+
},
|
1086 |
+
{
|
1087 |
+
"epoch": 0.005649589839777632,
|
1088 |
+
"eval_loss": 11.028435707092285,
|
1089 |
+
"eval_runtime": 176.0545,
|
1090 |
+
"eval_samples_per_second": 52.921,
|
1091 |
+
"eval_steps_per_second": 26.463,
|
1092 |
+
"step": 125
|
1093 |
+
},
|
1094 |
+
{
|
1095 |
+
"epoch": 0.005694786558495853,
|
1096 |
+
"grad_norm": 0.49188655614852905,
|
1097 |
+
"learning_rate": 0.0001735952412584635,
|
1098 |
+
"loss": 44.0859,
|
1099 |
+
"step": 126
|
1100 |
+
},
|
1101 |
+
{
|
1102 |
+
"epoch": 0.005739983277214074,
|
1103 |
+
"grad_norm": 0.5955361127853394,
|
1104 |
+
"learning_rate": 0.00017315965759728014,
|
1105 |
+
"loss": 44.0938,
|
1106 |
+
"step": 127
|
1107 |
+
},
|
1108 |
+
{
|
1109 |
+
"epoch": 0.0057851799959322955,
|
1110 |
+
"grad_norm": 0.4358704090118408,
|
1111 |
+
"learning_rate": 0.00017272106662911973,
|
1112 |
+
"loss": 44.1165,
|
1113 |
+
"step": 128
|
1114 |
+
},
|
1115 |
+
{
|
1116 |
+
"epoch": 0.005830376714650517,
|
1117 |
+
"grad_norm": 0.4302980899810791,
|
1118 |
+
"learning_rate": 0.00017227948638273916,
|
1119 |
+
"loss": 44.1088,
|
1120 |
+
"step": 129
|
1121 |
+
},
|
1122 |
+
{
|
1123 |
+
"epoch": 0.005875573433368738,
|
1124 |
+
"grad_norm": 0.5749801397323608,
|
1125 |
+
"learning_rate": 0.00017183493500977278,
|
1126 |
+
"loss": 44.1311,
|
1127 |
+
"step": 130
|
1128 |
+
},
|
1129 |
+
{
|
1130 |
+
"epoch": 0.005875573433368738,
|
1131 |
+
"eval_loss": 11.027961730957031,
|
1132 |
+
"eval_runtime": 176.2218,
|
1133 |
+
"eval_samples_per_second": 52.871,
|
1134 |
+
"eval_steps_per_second": 26.438,
|
1135 |
+
"step": 130
|
1136 |
}
|
1137 |
],
|
1138 |
"logging_steps": 1,
|
|
|
1161 |
"attributes": {}
|
1162 |
}
|
1163 |
},
|
1164 |
+
"total_flos": 1365875097600.0,
|
1165 |
"train_batch_size": 2,
|
1166 |
"trial_name": null,
|
1167 |
"trial_params": null
|