Training in progress, step 310, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 125048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8043d411e54c3df1ebca409696347b8e9a385dabbcffd552e858ae07b989cbf
|
3 |
size 125048
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 162868
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8fc14c40c058613fe56fd7b9d5397288001f7e3fc116344b43062f632ff8265
|
3 |
size 162868
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:314e02efe1b3db288f9e9c8ca583964838ea24b3581864ee7ae9f30730c12eb3
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c463750d3752994b4ac9a5cae2d97850942d858f0130dada5f5e81e74ee4daf
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 11.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2595,6 +2595,92 @@
|
|
2595 |
"eval_samples_per_second": 52.809,
|
2596 |
"eval_steps_per_second": 26.407,
|
2597 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2598 |
}
|
2599 |
],
|
2600 |
"logging_steps": 1,
|
@@ -2623,7 +2709,7 @@
|
|
2623 |
"attributes": {}
|
2624 |
}
|
2625 |
},
|
2626 |
-
"total_flos":
|
2627 |
"train_batch_size": 2,
|
2628 |
"trial_name": null,
|
2629 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 11.018967628479004,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-310",
|
4 |
+
"epoch": 0.014010982802648527,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 310,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2595 |
"eval_samples_per_second": 52.809,
|
2596 |
"eval_steps_per_second": 26.407,
|
2597 |
"step": 300
|
2598 |
+
},
|
2599 |
+
{
|
2600 |
+
"epoch": 0.013604212334184539,
|
2601 |
+
"grad_norm": 0.49278682470321655,
|
2602 |
+
"learning_rate": 7.093318872950665e-05,
|
2603 |
+
"loss": 44.0319,
|
2604 |
+
"step": 301
|
2605 |
+
},
|
2606 |
+
{
|
2607 |
+
"epoch": 0.01364940905290276,
|
2608 |
+
"grad_norm": 0.5009450316429138,
|
2609 |
+
"learning_rate": 7.032033094488095e-05,
|
2610 |
+
"loss": 44.0988,
|
2611 |
+
"step": 302
|
2612 |
+
},
|
2613 |
+
{
|
2614 |
+
"epoch": 0.013694605771620981,
|
2615 |
+
"grad_norm": 0.4270615577697754,
|
2616 |
+
"learning_rate": 6.97086931752028e-05,
|
2617 |
+
"loss": 44.1025,
|
2618 |
+
"step": 303
|
2619 |
+
},
|
2620 |
+
{
|
2621 |
+
"epoch": 0.013739802490339202,
|
2622 |
+
"grad_norm": 0.49744102358818054,
|
2623 |
+
"learning_rate": 6.909830056250527e-05,
|
2624 |
+
"loss": 44.0652,
|
2625 |
+
"step": 304
|
2626 |
+
},
|
2627 |
+
{
|
2628 |
+
"epoch": 0.013784999209057422,
|
2629 |
+
"grad_norm": 0.48600587248802185,
|
2630 |
+
"learning_rate": 6.848917819763793e-05,
|
2631 |
+
"loss": 44.1292,
|
2632 |
+
"step": 305
|
2633 |
+
},
|
2634 |
+
{
|
2635 |
+
"epoch": 0.013784999209057422,
|
2636 |
+
"eval_loss": 11.01909351348877,
|
2637 |
+
"eval_runtime": 176.1259,
|
2638 |
+
"eval_samples_per_second": 52.9,
|
2639 |
+
"eval_steps_per_second": 26.453,
|
2640 |
+
"step": 305
|
2641 |
+
},
|
2642 |
+
{
|
2643 |
+
"epoch": 0.013830195927775643,
|
2644 |
+
"grad_norm": 0.4116569459438324,
|
2645 |
+
"learning_rate": 6.788135111923545e-05,
|
2646 |
+
"loss": 44.0897,
|
2647 |
+
"step": 306
|
2648 |
+
},
|
2649 |
+
{
|
2650 |
+
"epoch": 0.013875392646493864,
|
2651 |
+
"grad_norm": 0.4364916682243347,
|
2652 |
+
"learning_rate": 6.72748443126883e-05,
|
2653 |
+
"loss": 44.1195,
|
2654 |
+
"step": 307
|
2655 |
+
},
|
2656 |
+
{
|
2657 |
+
"epoch": 0.013920589365212085,
|
2658 |
+
"grad_norm": 0.5589216351509094,
|
2659 |
+
"learning_rate": 6.666968270911584e-05,
|
2660 |
+
"loss": 44.0911,
|
2661 |
+
"step": 308
|
2662 |
+
},
|
2663 |
+
{
|
2664 |
+
"epoch": 0.013965786083930306,
|
2665 |
+
"grad_norm": 0.5414496064186096,
|
2666 |
+
"learning_rate": 6.606589118434126e-05,
|
2667 |
+
"loss": 44.1532,
|
2668 |
+
"step": 309
|
2669 |
+
},
|
2670 |
+
{
|
2671 |
+
"epoch": 0.014010982802648527,
|
2672 |
+
"grad_norm": 0.4488687515258789,
|
2673 |
+
"learning_rate": 6.546349455786926e-05,
|
2674 |
+
"loss": 44.0637,
|
2675 |
+
"step": 310
|
2676 |
+
},
|
2677 |
+
{
|
2678 |
+
"epoch": 0.014010982802648527,
|
2679 |
+
"eval_loss": 11.018967628479004,
|
2680 |
+
"eval_runtime": 176.4018,
|
2681 |
+
"eval_samples_per_second": 52.817,
|
2682 |
+
"eval_steps_per_second": 26.411,
|
2683 |
+
"step": 310
|
2684 |
}
|
2685 |
],
|
2686 |
"logging_steps": 1,
|
|
|
2709 |
"attributes": {}
|
2710 |
}
|
2711 |
},
|
2712 |
+
"total_flos": 3257086771200.0,
|
2713 |
"train_batch_size": 2,
|
2714 |
"trial_name": null,
|
2715 |
"trial_params": null
|