Training in progress, step 3150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 69527352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:704458e22083d426be5e0b2430ec99e95658e2146eeda1abbadddcef1b66afa0
|
3 |
size 69527352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 139313554
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:516e6b0d7cfd706f5b04b458cc6f13af606fbcb05d80be45f02aa990d2fa7939
|
3 |
size 139313554
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7a7bdab08336c0f7233e606ce96075425fa9cf729719c53f2840e05d72ac534
|
3 |
size 14308
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2ad8a27e92c879b969b5845f60871e76a73be3547e482cc45027df5fe072f15
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2267,6 +2267,119 @@
|
|
2267 |
"eval_samples_per_second": 9.152,
|
2268 |
"eval_steps_per_second": 9.152,
|
2269 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2270 |
}
|
2271 |
],
|
2272 |
"logging_steps": 10,
|
@@ -2286,7 +2399,7 @@
|
|
2286 |
"attributes": {}
|
2287 |
}
|
2288 |
},
|
2289 |
-
"total_flos": 1.
|
2290 |
"train_batch_size": 16,
|
2291 |
"trial_name": null,
|
2292 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.801069974899292,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-3150",
|
4 |
+
"epoch": 0.39159622078567874,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 3150,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2267 |
"eval_samples_per_second": 9.152,
|
2268 |
"eval_steps_per_second": 9.152,
|
2269 |
"step": 3000
|
2270 |
+
},
|
2271 |
+
{
|
2272 |
+
"epoch": 0.37419194430631525,
|
2273 |
+
"grad_norm": 1.3308610916137695,
|
2274 |
+
"learning_rate": 4.433324295594166e-05,
|
2275 |
+
"loss": 0.6302,
|
2276 |
+
"step": 3010
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 0.37543510691198406,
|
2280 |
+
"grad_norm": 1.3117073774337769,
|
2281 |
+
"learning_rate": 4.3950206840550585e-05,
|
2282 |
+
"loss": 0.5286,
|
2283 |
+
"step": 3020
|
2284 |
+
},
|
2285 |
+
{
|
2286 |
+
"epoch": 0.37667826951765293,
|
2287 |
+
"grad_norm": 1.3257042169570923,
|
2288 |
+
"learning_rate": 4.3567933234501746e-05,
|
2289 |
+
"loss": 0.6227,
|
2290 |
+
"step": 3030
|
2291 |
+
},
|
2292 |
+
{
|
2293 |
+
"epoch": 0.37792143212332174,
|
2294 |
+
"grad_norm": 1.614931583404541,
|
2295 |
+
"learning_rate": 4.318643785156579e-05,
|
2296 |
+
"loss": 0.5531,
|
2297 |
+
"step": 3040
|
2298 |
+
},
|
2299 |
+
{
|
2300 |
+
"epoch": 0.37916459472899056,
|
2301 |
+
"grad_norm": 1.4024949073791504,
|
2302 |
+
"learning_rate": 4.280573637352371e-05,
|
2303 |
+
"loss": 0.6107,
|
2304 |
+
"step": 3050
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 0.38040775733465937,
|
2308 |
+
"grad_norm": 1.3442318439483643,
|
2309 |
+
"learning_rate": 4.242584444952216e-05,
|
2310 |
+
"loss": 0.619,
|
2311 |
+
"step": 3060
|
2312 |
+
},
|
2313 |
+
{
|
2314 |
+
"epoch": 0.3816509199403282,
|
2315 |
+
"grad_norm": 1.6472797393798828,
|
2316 |
+
"learning_rate": 4.204677769543019e-05,
|
2317 |
+
"loss": 0.6219,
|
2318 |
+
"step": 3070
|
2319 |
+
},
|
2320 |
+
{
|
2321 |
+
"epoch": 0.382894082545997,
|
2322 |
+
"grad_norm": 1.226382851600647,
|
2323 |
+
"learning_rate": 4.16685516931974e-05,
|
2324 |
+
"loss": 0.5669,
|
2325 |
+
"step": 3080
|
2326 |
+
},
|
2327 |
+
{
|
2328 |
+
"epoch": 0.38413724515166586,
|
2329 |
+
"grad_norm": 1.2685925960540771,
|
2330 |
+
"learning_rate": 4.1291181990213286e-05,
|
2331 |
+
"loss": 0.5875,
|
2332 |
+
"step": 3090
|
2333 |
+
},
|
2334 |
+
{
|
2335 |
+
"epoch": 0.3853804077573347,
|
2336 |
+
"grad_norm": 2.1690385341644287,
|
2337 |
+
"learning_rate": 4.0914684098668286e-05,
|
2338 |
+
"loss": 0.6366,
|
2339 |
+
"step": 3100
|
2340 |
+
},
|
2341 |
+
{
|
2342 |
+
"epoch": 0.3866235703630035,
|
2343 |
+
"grad_norm": 1.2350751161575317,
|
2344 |
+
"learning_rate": 4.053907349491608e-05,
|
2345 |
+
"loss": 0.6034,
|
2346 |
+
"step": 3110
|
2347 |
+
},
|
2348 |
+
{
|
2349 |
+
"epoch": 0.3878667329686723,
|
2350 |
+
"grad_norm": 1.4489704370498657,
|
2351 |
+
"learning_rate": 4.016436561883746e-05,
|
2352 |
+
"loss": 0.6346,
|
2353 |
+
"step": 3120
|
2354 |
+
},
|
2355 |
+
{
|
2356 |
+
"epoch": 0.3891098955743411,
|
2357 |
+
"grad_norm": 1.332980990409851,
|
2358 |
+
"learning_rate": 3.979057587320554e-05,
|
2359 |
+
"loss": 0.5858,
|
2360 |
+
"step": 3130
|
2361 |
+
},
|
2362 |
+
{
|
2363 |
+
"epoch": 0.3903530581800099,
|
2364 |
+
"grad_norm": 1.391718864440918,
|
2365 |
+
"learning_rate": 3.941771962305274e-05,
|
2366 |
+
"loss": 0.7218,
|
2367 |
+
"step": 3140
|
2368 |
+
},
|
2369 |
+
{
|
2370 |
+
"epoch": 0.39159622078567874,
|
2371 |
+
"grad_norm": 1.269722819328308,
|
2372 |
+
"learning_rate": 3.9045812195039125e-05,
|
2373 |
+
"loss": 0.5846,
|
2374 |
+
"step": 3150
|
2375 |
+
},
|
2376 |
+
{
|
2377 |
+
"epoch": 0.39159622078567874,
|
2378 |
+
"eval_loss": 0.801069974899292,
|
2379 |
+
"eval_runtime": 56.6006,
|
2380 |
+
"eval_samples_per_second": 8.834,
|
2381 |
+
"eval_steps_per_second": 8.834,
|
2382 |
+
"step": 3150
|
2383 |
}
|
2384 |
],
|
2385 |
"logging_steps": 10,
|
|
|
2399 |
"attributes": {}
|
2400 |
}
|
2401 |
},
|
2402 |
+
"total_flos": 1.0945600710137856e+17,
|
2403 |
"train_batch_size": 16,
|
2404 |
"trial_name": null,
|
2405 |
"trial_params": null
|