Training in progress, step 3300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 69527352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2eb2f4a1272ed7c45d0d57597219e288173a36ff1d96174f964cf75aa7e50f1
|
3 |
size 69527352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 139313554
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91afd86c3f0645431d67d9e9caef6058ba72bbca9804fa1b34ae225fb0fdcdfc
|
3 |
size 139313554
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89543781f745d82510d3991bd8bd26751b68ca2499fbac19015521a55810e601
|
3 |
size 14308
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5af34678d4362657736a6697e6bc5d13d1a967b12f171df00bcc4a7612a9b8a2
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2380,6 +2380,119 @@
|
|
2380 |
"eval_samples_per_second": 8.834,
|
2381 |
"eval_steps_per_second": 8.834,
|
2382 |
"step": 3150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2383 |
}
|
2384 |
],
|
2385 |
"logging_steps": 10,
|
@@ -2399,7 +2512,7 @@
|
|
2399 |
"attributes": {}
|
2400 |
}
|
2401 |
},
|
2402 |
-
"total_flos": 1.
|
2403 |
"train_batch_size": 16,
|
2404 |
"trial_name": null,
|
2405 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.7923147678375244,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-3300",
|
4 |
+
"epoch": 0.4102436598707111,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 3300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2380 |
"eval_samples_per_second": 8.834,
|
2381 |
"eval_steps_per_second": 8.834,
|
2382 |
"step": 3150
|
2383 |
+
},
|
2384 |
+
{
|
2385 |
+
"epoch": 0.3928393833913476,
|
2386 |
+
"grad_norm": 1.8093242645263672,
|
2387 |
+
"learning_rate": 3.8674868876822395e-05,
|
2388 |
+
"loss": 0.5608,
|
2389 |
+
"step": 3160
|
2390 |
+
},
|
2391 |
+
{
|
2392 |
+
"epoch": 0.3940825459970164,
|
2393 |
+
"grad_norm": 1.6383775472640991,
|
2394 |
+
"learning_rate": 3.83049049164295e-05,
|
2395 |
+
"loss": 0.5706,
|
2396 |
+
"step": 3170
|
2397 |
+
},
|
2398 |
+
{
|
2399 |
+
"epoch": 0.39532570860268523,
|
2400 |
+
"grad_norm": 1.7762494087219238,
|
2401 |
+
"learning_rate": 3.793593552162978e-05,
|
2402 |
+
"loss": 0.6272,
|
2403 |
+
"step": 3180
|
2404 |
+
},
|
2405 |
+
{
|
2406 |
+
"epoch": 0.39656887120835405,
|
2407 |
+
"grad_norm": 1.989702582359314,
|
2408 |
+
"learning_rate": 3.75679758593099e-05,
|
2409 |
+
"loss": 0.6268,
|
2410 |
+
"step": 3190
|
2411 |
+
},
|
2412 |
+
{
|
2413 |
+
"epoch": 0.39781203381402286,
|
2414 |
+
"grad_norm": 1.2394602298736572,
|
2415 |
+
"learning_rate": 3.720104105485039e-05,
|
2416 |
+
"loss": 0.5745,
|
2417 |
+
"step": 3200
|
2418 |
+
},
|
2419 |
+
{
|
2420 |
+
"epoch": 0.39905519641969167,
|
2421 |
+
"grad_norm": 1.6666808128356934,
|
2422 |
+
"learning_rate": 3.6835146191503885e-05,
|
2423 |
+
"loss": 0.6287,
|
2424 |
+
"step": 3210
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 0.40029835902536054,
|
2428 |
+
"grad_norm": 0.926642119884491,
|
2429 |
+
"learning_rate": 3.647030630977508e-05,
|
2430 |
+
"loss": 0.6038,
|
2431 |
+
"step": 3220
|
2432 |
+
},
|
2433 |
+
{
|
2434 |
+
"epoch": 0.40154152163102935,
|
2435 |
+
"grad_norm": 1.3358100652694702,
|
2436 |
+
"learning_rate": 3.6106536406802524e-05,
|
2437 |
+
"loss": 0.5941,
|
2438 |
+
"step": 3230
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"epoch": 0.40278468423669817,
|
2442 |
+
"grad_norm": 1.339179277420044,
|
2443 |
+
"learning_rate": 3.5743851435742176e-05,
|
2444 |
+
"loss": 0.5888,
|
2445 |
+
"step": 3240
|
2446 |
+
},
|
2447 |
+
{
|
2448 |
+
"epoch": 0.404027846842367,
|
2449 |
+
"grad_norm": 1.4704395532608032,
|
2450 |
+
"learning_rate": 3.538226630515262e-05,
|
2451 |
+
"loss": 0.5113,
|
2452 |
+
"step": 3250
|
2453 |
+
},
|
2454 |
+
{
|
2455 |
+
"epoch": 0.4052710094480358,
|
2456 |
+
"grad_norm": 1.2576725482940674,
|
2457 |
+
"learning_rate": 3.502179587838238e-05,
|
2458 |
+
"loss": 0.5874,
|
2459 |
+
"step": 3260
|
2460 |
+
},
|
2461 |
+
{
|
2462 |
+
"epoch": 0.4065141720537046,
|
2463 |
+
"grad_norm": 1.1804664134979248,
|
2464 |
+
"learning_rate": 3.46624549729588e-05,
|
2465 |
+
"loss": 0.6054,
|
2466 |
+
"step": 3270
|
2467 |
+
},
|
2468 |
+
{
|
2469 |
+
"epoch": 0.40775733465937347,
|
2470 |
+
"grad_norm": 1.6472457647323608,
|
2471 |
+
"learning_rate": 3.430425835997908e-05,
|
2472 |
+
"loss": 0.6168,
|
2473 |
+
"step": 3280
|
2474 |
+
},
|
2475 |
+
{
|
2476 |
+
"epoch": 0.4090004972650423,
|
2477 |
+
"grad_norm": 1.3699522018432617,
|
2478 |
+
"learning_rate": 3.394722076350302e-05,
|
2479 |
+
"loss": 0.5227,
|
2480 |
+
"step": 3290
|
2481 |
+
},
|
2482 |
+
{
|
2483 |
+
"epoch": 0.4102436598707111,
|
2484 |
+
"grad_norm": 0.9297524690628052,
|
2485 |
+
"learning_rate": 3.359135685994781e-05,
|
2486 |
+
"loss": 0.5818,
|
2487 |
+
"step": 3300
|
2488 |
+
},
|
2489 |
+
{
|
2490 |
+
"epoch": 0.4102436598707111,
|
2491 |
+
"eval_loss": 0.7923147678375244,
|
2492 |
+
"eval_runtime": 53.7845,
|
2493 |
+
"eval_samples_per_second": 9.296,
|
2494 |
+
"eval_steps_per_second": 9.296,
|
2495 |
+
"step": 3300
|
2496 |
}
|
2497 |
],
|
2498 |
"logging_steps": 10,
|
|
|
2512 |
"attributes": {}
|
2513 |
}
|
2514 |
},
|
2515 |
+
"total_flos": 1.1468266790135808e+17,
|
2516 |
"train_batch_size": 16,
|
2517 |
"trial_name": null,
|
2518 |
"trial_params": null
|