Training in progress, step 260, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 125048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c81d175e794ab238d63b2a692ce503c5c4dfef3174dfee2601e03d21ee7e7ff
|
3 |
size 125048
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 162868
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6588d9206907a6c00235f951ec1e76bb5c61306e975035df3959233feba0de44
|
3 |
size 162868
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1be1448f42a3a082b7043ab2c191269d82518d2f41873081925d363ddc8352ea
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fac612e1de34a13e54762dd7927b1179494a688e77b41a80ece98fe382c45710
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 11.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2165,6 +2165,92 @@
|
|
2165 |
"eval_samples_per_second": 52.839,
|
2166 |
"eval_steps_per_second": 26.422,
|
2167 |
"step": 250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2168 |
}
|
2169 |
],
|
2170 |
"logging_steps": 1,
|
@@ -2193,7 +2279,7 @@
|
|
2193 |
"attributes": {}
|
2194 |
}
|
2195 |
},
|
2196 |
-
"total_flos":
|
2197 |
"train_batch_size": 2,
|
2198 |
"trial_name": null,
|
2199 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 11.020062446594238,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-260",
|
4 |
+
"epoch": 0.011751146866737476,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 260,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2165 |
"eval_samples_per_second": 52.839,
|
2166 |
"eval_steps_per_second": 26.422,
|
2167 |
"step": 250
|
2168 |
+
},
|
2169 |
+
{
|
2170 |
+
"epoch": 0.011344376398273485,
|
2171 |
+
"grad_norm": 0.5418515801429749,
|
2172 |
+
"learning_rate": 0.00010256428432180956,
|
2173 |
+
"loss": 44.0602,
|
2174 |
+
"step": 251
|
2175 |
+
},
|
2176 |
+
{
|
2177 |
+
"epoch": 0.011389573116991706,
|
2178 |
+
"grad_norm": 0.45757991075515747,
|
2179 |
+
"learning_rate": 0.00010192330547876871,
|
2180 |
+
"loss": 44.0788,
|
2181 |
+
"step": 252
|
2182 |
+
},
|
2183 |
+
{
|
2184 |
+
"epoch": 0.011434769835709927,
|
2185 |
+
"grad_norm": 0.5210107564926147,
|
2186 |
+
"learning_rate": 0.00010128224757617274,
|
2187 |
+
"loss": 44.0517,
|
2188 |
+
"step": 253
|
2189 |
+
},
|
2190 |
+
{
|
2191 |
+
"epoch": 0.011479966554428149,
|
2192 |
+
"grad_norm": 0.39198753237724304,
|
2193 |
+
"learning_rate": 0.00010064113696540111,
|
2194 |
+
"loss": 44.0776,
|
2195 |
+
"step": 254
|
2196 |
+
},
|
2197 |
+
{
|
2198 |
+
"epoch": 0.01152516327314637,
|
2199 |
+
"grad_norm": 0.4305363893508911,
|
2200 |
+
"learning_rate": 0.0001,
|
2201 |
+
"loss": 44.1121,
|
2202 |
+
"step": 255
|
2203 |
+
},
|
2204 |
+
{
|
2205 |
+
"epoch": 0.01152516327314637,
|
2206 |
+
"eval_loss": 11.02021312713623,
|
2207 |
+
"eval_runtime": 176.1601,
|
2208 |
+
"eval_samples_per_second": 52.889,
|
2209 |
+
"eval_steps_per_second": 26.448,
|
2210 |
+
"step": 255
|
2211 |
+
},
|
2212 |
+
{
|
2213 |
+
"epoch": 0.011570359991864591,
|
2214 |
+
"grad_norm": 0.4909750521183014,
|
2215 |
+
"learning_rate": 9.93588630345989e-05,
|
2216 |
+
"loss": 44.0858,
|
2217 |
+
"step": 256
|
2218 |
+
},
|
2219 |
+
{
|
2220 |
+
"epoch": 0.011615556710582812,
|
2221 |
+
"grad_norm": 0.4016626477241516,
|
2222 |
+
"learning_rate": 9.871775242382727e-05,
|
2223 |
+
"loss": 44.0732,
|
2224 |
+
"step": 257
|
2225 |
+
},
|
2226 |
+
{
|
2227 |
+
"epoch": 0.011660753429301033,
|
2228 |
+
"grad_norm": 0.5827097296714783,
|
2229 |
+
"learning_rate": 9.80766945212313e-05,
|
2230 |
+
"loss": 44.0957,
|
2231 |
+
"step": 258
|
2232 |
+
},
|
2233 |
+
{
|
2234 |
+
"epoch": 0.011705950148019255,
|
2235 |
+
"grad_norm": 0.48728469014167786,
|
2236 |
+
"learning_rate": 9.743571567819046e-05,
|
2237 |
+
"loss": 44.0648,
|
2238 |
+
"step": 259
|
2239 |
+
},
|
2240 |
+
{
|
2241 |
+
"epoch": 0.011751146866737476,
|
2242 |
+
"grad_norm": 0.455342173576355,
|
2243 |
+
"learning_rate": 9.679484224283449e-05,
|
2244 |
+
"loss": 44.0327,
|
2245 |
+
"step": 260
|
2246 |
+
},
|
2247 |
+
{
|
2248 |
+
"epoch": 0.011751146866737476,
|
2249 |
+
"eval_loss": 11.020062446594238,
|
2250 |
+
"eval_runtime": 176.2853,
|
2251 |
+
"eval_samples_per_second": 52.852,
|
2252 |
+
"eval_steps_per_second": 26.429,
|
2253 |
+
"step": 260
|
2254 |
}
|
2255 |
],
|
2256 |
"logging_steps": 1,
|
|
|
2279 |
"attributes": {}
|
2280 |
}
|
2281 |
},
|
2282 |
+
"total_flos": 2731750195200.0,
|
2283 |
"train_batch_size": 2,
|
2284 |
"trial_name": null,
|
2285 |
"trial_params": null
|