Training in progress, step 3150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 524363632
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89cc1745b4500ad6cf558198aa9b4e987065634d082f54bd44c3c89a26dd0906
|
3 |
size 524363632
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1049049442
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96cf2b77c6232af1bdf1652020d2d1e3b99db45a4d0a35dc7260f2f620431494
|
3 |
size 1049049442
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:469b796b4ae7827da357c6ad4d389b1a5f899bd0c5614fe11b27f1430a7f9bdc
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a2801bc6a2e1667d6f68fb7b82fe94994bd9e743692539b17302be0c7385f74
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2267,6 +2267,119 @@
|
|
2267 |
"eval_samples_per_second": 9.696,
|
2268 |
"eval_steps_per_second": 9.696,
|
2269 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2270 |
}
|
2271 |
],
|
2272 |
"logging_steps": 10,
|
@@ -2286,7 +2399,7 @@
|
|
2286 |
"attributes": {}
|
2287 |
}
|
2288 |
},
|
2289 |
-
"total_flos":
|
2290 |
"train_batch_size": 4,
|
2291 |
"trial_name": null,
|
2292 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.1985480785369873,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-3150",
|
4 |
+
"epoch": 0.14109742441209405,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 3150,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2267 |
"eval_samples_per_second": 9.696,
|
2268 |
"eval_steps_per_second": 9.696,
|
2269 |
"step": 3000
|
2270 |
+
},
|
2271 |
+
{
|
2272 |
+
"epoch": 0.13482642777155654,
|
2273 |
+
"grad_norm": 4.307104587554932,
|
2274 |
+
"learning_rate": 2.6599945773564997e-06,
|
2275 |
+
"loss": 1.1743,
|
2276 |
+
"step": 3010
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 0.13527435610302352,
|
2280 |
+
"grad_norm": 4.9457221031188965,
|
2281 |
+
"learning_rate": 2.6370124104330357e-06,
|
2282 |
+
"loss": 1.1287,
|
2283 |
+
"step": 3020
|
2284 |
+
},
|
2285 |
+
{
|
2286 |
+
"epoch": 0.13572228443449047,
|
2287 |
+
"grad_norm": 3.17401385307312,
|
2288 |
+
"learning_rate": 2.614075994070105e-06,
|
2289 |
+
"loss": 1.1686,
|
2290 |
+
"step": 3030
|
2291 |
+
},
|
2292 |
+
{
|
2293 |
+
"epoch": 0.13617021276595745,
|
2294 |
+
"grad_norm": 6.098177433013916,
|
2295 |
+
"learning_rate": 2.591186271093948e-06,
|
2296 |
+
"loss": 1.1546,
|
2297 |
+
"step": 3040
|
2298 |
+
},
|
2299 |
+
{
|
2300 |
+
"epoch": 0.1366181410974244,
|
2301 |
+
"grad_norm": 4.12905216217041,
|
2302 |
+
"learning_rate": 2.568344182411423e-06,
|
2303 |
+
"loss": 1.0909,
|
2304 |
+
"step": 3050
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 0.13706606942889138,
|
2308 |
+
"grad_norm": 4.946627616882324,
|
2309 |
+
"learning_rate": 2.5455506669713293e-06,
|
2310 |
+
"loss": 1.2223,
|
2311 |
+
"step": 3060
|
2312 |
+
},
|
2313 |
+
{
|
2314 |
+
"epoch": 0.13751399776035833,
|
2315 |
+
"grad_norm": 4.25789737701416,
|
2316 |
+
"learning_rate": 2.522806661725812e-06,
|
2317 |
+
"loss": 1.0383,
|
2318 |
+
"step": 3070
|
2319 |
+
},
|
2320 |
+
{
|
2321 |
+
"epoch": 0.1379619260918253,
|
2322 |
+
"grad_norm": 6.536715030670166,
|
2323 |
+
"learning_rate": 2.5001131015918444e-06,
|
2324 |
+
"loss": 0.9992,
|
2325 |
+
"step": 3080
|
2326 |
+
},
|
2327 |
+
{
|
2328 |
+
"epoch": 0.13840985442329226,
|
2329 |
+
"grad_norm": 5.861030578613281,
|
2330 |
+
"learning_rate": 2.4774709194127973e-06,
|
2331 |
+
"loss": 1.1678,
|
2332 |
+
"step": 3090
|
2333 |
+
},
|
2334 |
+
{
|
2335 |
+
"epoch": 0.13885778275475924,
|
2336 |
+
"grad_norm": 4.58046293258667,
|
2337 |
+
"learning_rate": 2.4548810459200973e-06,
|
2338 |
+
"loss": 1.2545,
|
2339 |
+
"step": 3100
|
2340 |
+
},
|
2341 |
+
{
|
2342 |
+
"epoch": 0.1393057110862262,
|
2343 |
+
"grad_norm": 6.048022270202637,
|
2344 |
+
"learning_rate": 2.4323444096949647e-06,
|
2345 |
+
"loss": 1.0531,
|
2346 |
+
"step": 3110
|
2347 |
+
},
|
2348 |
+
{
|
2349 |
+
"epoch": 0.13975363941769317,
|
2350 |
+
"grad_norm": 5.86400842666626,
|
2351 |
+
"learning_rate": 2.409861937130248e-06,
|
2352 |
+
"loss": 1.1093,
|
2353 |
+
"step": 3120
|
2354 |
+
},
|
2355 |
+
{
|
2356 |
+
"epoch": 0.14020156774916012,
|
2357 |
+
"grad_norm": 3.7916102409362793,
|
2358 |
+
"learning_rate": 2.3874345523923327e-06,
|
2359 |
+
"loss": 1.1048,
|
2360 |
+
"step": 3130
|
2361 |
+
},
|
2362 |
+
{
|
2363 |
+
"epoch": 0.1406494960806271,
|
2364 |
+
"grad_norm": 4.009166717529297,
|
2365 |
+
"learning_rate": 2.3650631773831644e-06,
|
2366 |
+
"loss": 1.0198,
|
2367 |
+
"step": 3140
|
2368 |
+
},
|
2369 |
+
{
|
2370 |
+
"epoch": 0.14109742441209405,
|
2371 |
+
"grad_norm": 4.695572853088379,
|
2372 |
+
"learning_rate": 2.3427487317023477e-06,
|
2373 |
+
"loss": 1.1909,
|
2374 |
+
"step": 3150
|
2375 |
+
},
|
2376 |
+
{
|
2377 |
+
"epoch": 0.14109742441209405,
|
2378 |
+
"eval_loss": 1.1985480785369873,
|
2379 |
+
"eval_runtime": 51.6619,
|
2380 |
+
"eval_samples_per_second": 9.678,
|
2381 |
+
"eval_steps_per_second": 9.678,
|
2382 |
+
"step": 3150
|
2383 |
}
|
2384 |
],
|
2385 |
"logging_steps": 10,
|
|
|
2399 |
"attributes": {}
|
2400 |
}
|
2401 |
},
|
2402 |
+
"total_flos": 4.04330569814016e+17,
|
2403 |
"train_batch_size": 4,
|
2404 |
"trial_name": null,
|
2405 |
"trial_params": null
|