Training in progress, step 547, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 191968
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c56401c5e3fc86419d48de1cc99bcf3059b6b2cc7c8ac93d54e779d71019847
|
3 |
size 191968
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 253144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86334b328c56e2d5a9e3f1ae6382f96c9d62d87b13ac1c2e5f6cf6724664fd79
|
3 |
size 253144
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a57af447cd6f38421f2a08b9c33ca21be0092487897eea84e8bb352a10ec5de0
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09e993d6d0decca6557382fb23f4a5c12589ee11e4e8e3addd876b8b9972e94e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 10.3164701461792,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-450",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -354,6 +354,69 @@
|
|
354 |
"eval_samples_per_second": 648.792,
|
355 |
"eval_steps_per_second": 162.726,
|
356 |
"step": 450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
}
|
358 |
],
|
359 |
"logging_steps": 10,
|
@@ -377,12 +440,12 @@
|
|
377 |
"should_evaluate": false,
|
378 |
"should_log": false,
|
379 |
"should_save": true,
|
380 |
-
"should_training_stop":
|
381 |
},
|
382 |
"attributes": {}
|
383 |
}
|
384 |
},
|
385 |
-
"total_flos":
|
386 |
"train_batch_size": 8,
|
387 |
"trial_name": null,
|
388 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 10.3164701461792,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-450",
|
4 |
+
"epoch": 1.0004572473708275,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 547,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
354 |
"eval_samples_per_second": 648.792,
|
355 |
"eval_steps_per_second": 162.726,
|
356 |
"step": 450
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"epoch": 0.8413351623228167,
|
360 |
+
"grad_norm": 0.06713691353797913,
|
361 |
+
"learning_rate": 7.3721207795187876e-06,
|
362 |
+
"loss": 10.3323,
|
363 |
+
"step": 460
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"epoch": 0.8596250571559213,
|
367 |
+
"grad_norm": 0.06836876273155212,
|
368 |
+
"learning_rate": 5.806543362721945e-06,
|
369 |
+
"loss": 10.3244,
|
370 |
+
"step": 470
|
371 |
+
},
|
372 |
+
{
|
373 |
+
"epoch": 0.877914951989026,
|
374 |
+
"grad_norm": 0.0769273117184639,
|
375 |
+
"learning_rate": 4.417488550807386e-06,
|
376 |
+
"loss": 10.3197,
|
377 |
+
"step": 480
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"epoch": 0.8962048468221308,
|
381 |
+
"grad_norm": 0.08537283539772034,
|
382 |
+
"learning_rate": 3.210504666816133e-06,
|
383 |
+
"loss": 10.3179,
|
384 |
+
"step": 490
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 0.9144947416552355,
|
388 |
+
"grad_norm": 0.20503534376621246,
|
389 |
+
"learning_rate": 2.1904127850760457e-06,
|
390 |
+
"loss": 10.314,
|
391 |
+
"step": 500
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"epoch": 0.9327846364883402,
|
395 |
+
"grad_norm": 0.07405146211385727,
|
396 |
+
"learning_rate": 1.3612874743103189e-06,
|
397 |
+
"loss": 10.3337,
|
398 |
+
"step": 510
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"epoch": 0.9510745313214449,
|
402 |
+
"grad_norm": 0.07247216254472733,
|
403 |
+
"learning_rate": 7.264405225248294e-07,
|
404 |
+
"loss": 10.3237,
|
405 |
+
"step": 520
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"epoch": 0.9693644261545497,
|
409 |
+
"grad_norm": 0.08077077567577362,
|
410 |
+
"learning_rate": 2.8840770868230894e-07,
|
411 |
+
"loss": 10.3192,
|
412 |
+
"step": 530
|
413 |
+
},
|
414 |
+
{
|
415 |
+
"epoch": 0.9876543209876543,
|
416 |
+
"grad_norm": 0.12774144113063812,
|
417 |
+
"learning_rate": 4.89386740013198e-08,
|
418 |
+
"loss": 10.3162,
|
419 |
+
"step": 540
|
420 |
}
|
421 |
],
|
422 |
"logging_steps": 10,
|
|
|
440 |
"should_evaluate": false,
|
441 |
"should_log": false,
|
442 |
"should_save": true,
|
443 |
+
"should_training_stop": true
|
444 |
},
|
445 |
"attributes": {}
|
446 |
}
|
447 |
},
|
448 |
+
"total_flos": 31629841219584.0,
|
449 |
"train_batch_size": 8,
|
450 |
"trial_name": null,
|
451 |
"trial_params": null
|