Training in progress, step 270, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 125048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75a3e84096039afb527d22d691d180e109ca9921f708bdaa27632df4487a4260
|
3 |
size 125048
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 162868
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc194bba403caad9cbc17f9f6c4159d35cde33e5cba286cd96d11edced40608d
|
3 |
size 162868
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3991cb24901cdd4ded826a1eb99233632b9b31143f5465b97735bc74e1caa25
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:765f5571460aced30b253ddf135511867127c526d96f703a3f7058177ad62b46
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 11.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2251,6 +2251,92 @@
|
|
2251 |
"eval_samples_per_second": 52.852,
|
2252 |
"eval_steps_per_second": 26.429,
|
2253 |
"step": 260
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2254 |
}
|
2255 |
],
|
2256 |
"logging_steps": 1,
|
@@ -2279,7 +2365,7 @@
|
|
2279 |
"attributes": {}
|
2280 |
}
|
2281 |
},
|
2282 |
-
"total_flos":
|
2283 |
"train_batch_size": 2,
|
2284 |
"trial_name": null,
|
2285 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 11.01980209350586,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-270",
|
4 |
+
"epoch": 0.012203114053919686,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 270,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2251 |
"eval_samples_per_second": 52.852,
|
2252 |
"eval_steps_per_second": 26.429,
|
2253 |
"step": 260
|
2254 |
+
},
|
2255 |
+
{
|
2256 |
+
"epoch": 0.011796343585455695,
|
2257 |
+
"grad_norm": 0.50531005859375,
|
2258 |
+
"learning_rate": 9.615410055896015e-05,
|
2259 |
+
"loss": 44.0094,
|
2260 |
+
"step": 261
|
2261 |
+
},
|
2262 |
+
{
|
2263 |
+
"epoch": 0.011841540304173916,
|
2264 |
+
"grad_norm": 0.6205224990844727,
|
2265 |
+
"learning_rate": 9.551351696494854e-05,
|
2266 |
+
"loss": 44.1,
|
2267 |
+
"step": 262
|
2268 |
+
},
|
2269 |
+
{
|
2270 |
+
"epoch": 0.011886737022892137,
|
2271 |
+
"grad_norm": 0.5274375081062317,
|
2272 |
+
"learning_rate": 9.48731177926821e-05,
|
2273 |
+
"loss": 44.1223,
|
2274 |
+
"step": 263
|
2275 |
+
},
|
2276 |
+
{
|
2277 |
+
"epoch": 0.011931933741610359,
|
2278 |
+
"grad_norm": 0.5149595141410828,
|
2279 |
+
"learning_rate": 9.423292936646257e-05,
|
2280 |
+
"loss": 44.1192,
|
2281 |
+
"step": 264
|
2282 |
+
},
|
2283 |
+
{
|
2284 |
+
"epoch": 0.01197713046032858,
|
2285 |
+
"grad_norm": 0.5359209179878235,
|
2286 |
+
"learning_rate": 9.359297800192872e-05,
|
2287 |
+
"loss": 44.1155,
|
2288 |
+
"step": 265
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"epoch": 0.01197713046032858,
|
2292 |
+
"eval_loss": 11.019892692565918,
|
2293 |
+
"eval_runtime": 176.1866,
|
2294 |
+
"eval_samples_per_second": 52.881,
|
2295 |
+
"eval_steps_per_second": 26.444,
|
2296 |
+
"step": 265
|
2297 |
+
},
|
2298 |
+
{
|
2299 |
+
"epoch": 0.012022327179046801,
|
2300 |
+
"grad_norm": 0.5752252340316772,
|
2301 |
+
"learning_rate": 9.29532900049746e-05,
|
2302 |
+
"loss": 44.0821,
|
2303 |
+
"step": 266
|
2304 |
+
},
|
2305 |
+
{
|
2306 |
+
"epoch": 0.012067523897765022,
|
2307 |
+
"grad_norm": 0.5125178098678589,
|
2308 |
+
"learning_rate": 9.231389167066837e-05,
|
2309 |
+
"loss": 44.061,
|
2310 |
+
"step": 267
|
2311 |
+
},
|
2312 |
+
{
|
2313 |
+
"epoch": 0.012112720616483243,
|
2314 |
+
"grad_norm": 0.5295204520225525,
|
2315 |
+
"learning_rate": 9.167480928217108e-05,
|
2316 |
+
"loss": 43.9889,
|
2317 |
+
"step": 268
|
2318 |
+
},
|
2319 |
+
{
|
2320 |
+
"epoch": 0.012157917335201465,
|
2321 |
+
"grad_norm": 0.40016570687294006,
|
2322 |
+
"learning_rate": 9.103606910965666e-05,
|
2323 |
+
"loss": 44.0684,
|
2324 |
+
"step": 269
|
2325 |
+
},
|
2326 |
+
{
|
2327 |
+
"epoch": 0.012203114053919686,
|
2328 |
+
"grad_norm": 0.42660149931907654,
|
2329 |
+
"learning_rate": 9.039769740923183e-05,
|
2330 |
+
"loss": 44.0547,
|
2331 |
+
"step": 270
|
2332 |
+
},
|
2333 |
+
{
|
2334 |
+
"epoch": 0.012203114053919686,
|
2335 |
+
"eval_loss": 11.01980209350586,
|
2336 |
+
"eval_runtime": 176.1599,
|
2337 |
+
"eval_samples_per_second": 52.889,
|
2338 |
+
"eval_steps_per_second": 26.448,
|
2339 |
+
"step": 270
|
2340 |
}
|
2341 |
],
|
2342 |
"logging_steps": 1,
|
|
|
2365 |
"attributes": {}
|
2366 |
}
|
2367 |
},
|
2368 |
+
"total_flos": 2836817510400.0,
|
2369 |
"train_batch_size": 2,
|
2370 |
"trial_name": null,
|
2371 |
"trial_params": null
|