|
{ |
|
"best_metric": 0.8799543380737305, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.08983717012914093, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00044918585064570465, |
|
"grad_norm": 1.2916557788848877, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 1.7692, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00044918585064570465, |
|
"eval_loss": 1.9869955778121948, |
|
"eval_runtime": 205.6388, |
|
"eval_samples_per_second": 145.863, |
|
"eval_steps_per_second": 4.561, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008983717012914093, |
|
"grad_norm": 1.607176661491394, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 1.8371, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.001347557551937114, |
|
"grad_norm": 1.799497365951538, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 1.8926, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0017967434025828186, |
|
"grad_norm": 1.7198957204818726, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 1.9009, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0022459292532285235, |
|
"grad_norm": 1.7114357948303223, |
|
"learning_rate": 5.05e-05, |
|
"loss": 1.8579, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002695115103874228, |
|
"grad_norm": 1.4471302032470703, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 1.6855, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0031443009545199328, |
|
"grad_norm": 0.8383511304855347, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.4356, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.003593486805165637, |
|
"grad_norm": 0.8857292532920837, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 1.3539, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.004042672655811342, |
|
"grad_norm": 0.7878719568252563, |
|
"learning_rate": 9.09e-05, |
|
"loss": 1.2892, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004491858506457047, |
|
"grad_norm": 0.8838219046592712, |
|
"learning_rate": 0.000101, |
|
"loss": 1.2533, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004941044357102751, |
|
"grad_norm": 0.8204696774482727, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 1.2432, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.005390230207748456, |
|
"grad_norm": 0.6225486397743225, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 1.184, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00583941605839416, |
|
"grad_norm": 0.45047685503959656, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 1.1476, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0062886019090398655, |
|
"grad_norm": 0.43591365218162537, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 1.1241, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00673778775968557, |
|
"grad_norm": 0.39855462312698364, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 1.1202, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.007186973610331274, |
|
"grad_norm": 0.32935649156570435, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 1.0963, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.007636159460976979, |
|
"grad_norm": 0.3512268364429474, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 1.0864, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.008085345311622683, |
|
"grad_norm": 0.36069127917289734, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 1.0866, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00853453116226839, |
|
"grad_norm": 0.3602248728275299, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 1.0199, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.008983717012914094, |
|
"grad_norm": 0.3629809021949768, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 1.0797, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009432902863559798, |
|
"grad_norm": 0.3103269934654236, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 1.0546, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.009882088714205503, |
|
"grad_norm": 0.30035093426704407, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 1.0643, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.010331274564851207, |
|
"grad_norm": 0.3492095172405243, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 1.0258, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.010780460415496912, |
|
"grad_norm": 0.4093751609325409, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 1.0746, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.011229646266142616, |
|
"grad_norm": 0.5422231554985046, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 0.975, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01167883211678832, |
|
"grad_norm": 0.344453364610672, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 1.0594, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.012128017967434027, |
|
"grad_norm": 0.3349956274032593, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 1.0283, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.012577203818079731, |
|
"grad_norm": 0.30147019028663635, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 1.0116, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.013026389668725435, |
|
"grad_norm": 0.25005072355270386, |
|
"learning_rate": 9.09e-05, |
|
"loss": 0.9857, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01347557551937114, |
|
"grad_norm": 0.262613981962204, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 1.0245, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.013924761370016844, |
|
"grad_norm": 0.35343340039253235, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 0.967, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.014373947220662549, |
|
"grad_norm": 0.24998489022254944, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 1.0017, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.014823133071308253, |
|
"grad_norm": 0.24498069286346436, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 1.0075, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.015272318921953958, |
|
"grad_norm": 0.21549324691295624, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 0.985, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.015721504772599662, |
|
"grad_norm": 0.22830170392990112, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 0.9629, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.016170690623245366, |
|
"grad_norm": 0.2677782475948334, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 1.014, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01661987647389107, |
|
"grad_norm": 0.30911344289779663, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 0.9741, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.01706906232453678, |
|
"grad_norm": 0.26794004440307617, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 0.972, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.017518248175182483, |
|
"grad_norm": 0.26757925748825073, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 1.0005, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.017967434025828188, |
|
"grad_norm": 0.23241755366325378, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 0.9912, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.018416619876473892, |
|
"grad_norm": 0.23415440320968628, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 0.96, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.018865805727119597, |
|
"grad_norm": 0.2970597445964813, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 1.0017, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0193149915777653, |
|
"grad_norm": 0.3233031630516052, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 0.9603, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.019764177428411005, |
|
"grad_norm": 0.25598403811454773, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 0.9232, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02021336327905671, |
|
"grad_norm": 0.28324705362319946, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 0.9762, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.020662549129702414, |
|
"grad_norm": 0.2578172981739044, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 0.9924, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02111173498034812, |
|
"grad_norm": 0.23937109112739563, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 0.9744, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.021560920830993823, |
|
"grad_norm": 0.2387576401233673, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 0.9448, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.022010106681639528, |
|
"grad_norm": 0.3055512309074402, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 0.9624, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.022459292532285232, |
|
"grad_norm": 0.3693564534187317, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 0.8829, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.022459292532285232, |
|
"eval_loss": 0.9505019783973694, |
|
"eval_runtime": 206.986, |
|
"eval_samples_per_second": 144.913, |
|
"eval_steps_per_second": 4.532, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.022908478382930936, |
|
"grad_norm": 0.27806970477104187, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 0.9497, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02335766423357664, |
|
"grad_norm": 0.25498324632644653, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 0.9515, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.023806850084222345, |
|
"grad_norm": 0.2593478560447693, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 0.9594, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.024256035934868053, |
|
"grad_norm": 0.2518002688884735, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 0.9459, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.024705221785513758, |
|
"grad_norm": 0.26930755376815796, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 0.9266, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.025154407636159462, |
|
"grad_norm": 0.3588949143886566, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 0.9222, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.025603593486805167, |
|
"grad_norm": 0.2470785528421402, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 0.943, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02605277933745087, |
|
"grad_norm": 0.24494914710521698, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 0.9628, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.026501965188096575, |
|
"grad_norm": 0.23059231042861938, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 0.9444, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02695115103874228, |
|
"grad_norm": 0.2332954704761505, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 0.9433, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.027400336889387984, |
|
"grad_norm": 0.2612130045890808, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 0.9485, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02784952274003369, |
|
"grad_norm": 0.3084039092063904, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 0.9346, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.028298708590679393, |
|
"grad_norm": 0.23960231244564056, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 0.955, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.028747894441325098, |
|
"grad_norm": 0.2437230795621872, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 0.9372, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.029197080291970802, |
|
"grad_norm": 0.2259586751461029, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 0.9379, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.029646266142616506, |
|
"grad_norm": 0.2495633065700531, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 0.9357, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03009545199326221, |
|
"grad_norm": 0.27990928292274475, |
|
"learning_rate": 7.07e-05, |
|
"loss": 0.9611, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.030544637843907915, |
|
"grad_norm": 0.28341051936149597, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 0.9106, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.030993823694553623, |
|
"grad_norm": 0.26214614510536194, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 0.9311, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.031443009545199324, |
|
"grad_norm": 0.30308669805526733, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 0.94, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03189219539584503, |
|
"grad_norm": 0.2610565423965454, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 0.9241, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03234138124649073, |
|
"grad_norm": 0.23850691318511963, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 0.9422, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03279056709713644, |
|
"grad_norm": 0.28376665711402893, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 0.9415, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03323975294778214, |
|
"grad_norm": 0.34418389201164246, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 0.9373, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.033688938798427846, |
|
"grad_norm": 0.4302990734577179, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 0.8997, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03413812464907356, |
|
"grad_norm": 0.2855401933193207, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 0.945, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03458731049971926, |
|
"grad_norm": 0.3258453607559204, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 0.933, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.035036496350364967, |
|
"grad_norm": 0.3375171422958374, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 0.9364, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03548568220101067, |
|
"grad_norm": 0.259318083524704, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 0.8854, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.035934868051656375, |
|
"grad_norm": 0.28955981135368347, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 0.9347, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03638405390230208, |
|
"grad_norm": 0.4210306406021118, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 0.8895, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.036833239752947784, |
|
"grad_norm": 0.22932107746601105, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 0.9466, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03728242560359349, |
|
"grad_norm": 0.22917746007442474, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 0.9374, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03773161145423919, |
|
"grad_norm": 0.25457313656806946, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 0.9387, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0381807973048849, |
|
"grad_norm": 0.2656180262565613, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 0.9287, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0386299831555306, |
|
"grad_norm": 0.26856109499931335, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 0.9036, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.039079169006176306, |
|
"grad_norm": 0.30407968163490295, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 0.8876, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03952835485682201, |
|
"grad_norm": 0.2707090675830841, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 0.9105, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.039977540707467715, |
|
"grad_norm": 0.26852765679359436, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 0.9667, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.04042672655811342, |
|
"grad_norm": 0.23827865719795227, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 0.9466, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.040875912408759124, |
|
"grad_norm": 0.25658079981803894, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 0.912, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04132509825940483, |
|
"grad_norm": 0.28569671511650085, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 0.9243, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04177428411005053, |
|
"grad_norm": 0.3140634298324585, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 0.9092, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04222346996069624, |
|
"grad_norm": 0.26119399070739746, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 0.8836, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04267265581134194, |
|
"grad_norm": 0.2906387448310852, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 0.9251, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.043121841661987646, |
|
"grad_norm": 0.2718961834907532, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 0.9486, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04357102751263335, |
|
"grad_norm": 0.2514139711856842, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 0.906, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.044020213363279055, |
|
"grad_norm": 0.26873457431793213, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 0.9222, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04446939921392476, |
|
"grad_norm": 0.3117465376853943, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 0.897, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.044918585064570464, |
|
"grad_norm": 0.4226572513580322, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 0.847, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.044918585064570464, |
|
"eval_loss": 0.9066126942634583, |
|
"eval_runtime": 207.0827, |
|
"eval_samples_per_second": 144.846, |
|
"eval_steps_per_second": 4.53, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04536777091521617, |
|
"grad_norm": 0.25195518136024475, |
|
"learning_rate": 5.262631578947368e-05, |
|
"loss": 0.9138, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.04581695676586187, |
|
"grad_norm": 0.2436404973268509, |
|
"learning_rate": 5.209473684210527e-05, |
|
"loss": 0.9067, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.04626614261650758, |
|
"grad_norm": 0.27484673261642456, |
|
"learning_rate": 5.1563157894736844e-05, |
|
"loss": 0.9343, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.04671532846715328, |
|
"grad_norm": 0.27864891290664673, |
|
"learning_rate": 5.1031578947368426e-05, |
|
"loss": 0.9083, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.047164514317798986, |
|
"grad_norm": 0.28357595205307007, |
|
"learning_rate": 5.05e-05, |
|
"loss": 0.8889, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04761370016844469, |
|
"grad_norm": 0.3293287754058838, |
|
"learning_rate": 4.9968421052631576e-05, |
|
"loss": 0.8996, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0480628860190904, |
|
"grad_norm": 0.25651612877845764, |
|
"learning_rate": 4.943684210526316e-05, |
|
"loss": 0.9268, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.048512071869736106, |
|
"grad_norm": 0.26456665992736816, |
|
"learning_rate": 4.890526315789474e-05, |
|
"loss": 0.9306, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04896125772038181, |
|
"grad_norm": 0.24413970112800598, |
|
"learning_rate": 4.8373684210526316e-05, |
|
"loss": 0.9318, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.049410443571027515, |
|
"grad_norm": 0.24325355887413025, |
|
"learning_rate": 4.784210526315789e-05, |
|
"loss": 0.9073, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04985962942167322, |
|
"grad_norm": 0.2712045907974243, |
|
"learning_rate": 4.731052631578947e-05, |
|
"loss": 0.8931, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.050308815272318924, |
|
"grad_norm": 0.3078647255897522, |
|
"learning_rate": 4.6778947368421055e-05, |
|
"loss": 0.8614, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05075800112296463, |
|
"grad_norm": 0.2627927362918854, |
|
"learning_rate": 4.624736842105263e-05, |
|
"loss": 0.8967, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.05120718697361033, |
|
"grad_norm": 0.25500932335853577, |
|
"learning_rate": 4.571578947368421e-05, |
|
"loss": 0.9088, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.05165637282425604, |
|
"grad_norm": 0.275991827249527, |
|
"learning_rate": 4.518421052631579e-05, |
|
"loss": 0.8913, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05210555867490174, |
|
"grad_norm": 0.2581343352794647, |
|
"learning_rate": 4.465263157894737e-05, |
|
"loss": 0.8865, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.052554744525547446, |
|
"grad_norm": 0.2798708975315094, |
|
"learning_rate": 4.412105263157895e-05, |
|
"loss": 0.9146, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.05300393037619315, |
|
"grad_norm": 0.3520914316177368, |
|
"learning_rate": 4.358947368421053e-05, |
|
"loss": 0.9002, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.053453116226838855, |
|
"grad_norm": 0.3063805401325226, |
|
"learning_rate": 4.30578947368421e-05, |
|
"loss": 0.8889, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.05390230207748456, |
|
"grad_norm": 0.268318772315979, |
|
"learning_rate": 4.2526315789473685e-05, |
|
"loss": 0.9306, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.054351487928130264, |
|
"grad_norm": 0.2569490075111389, |
|
"learning_rate": 4.199473684210527e-05, |
|
"loss": 0.9124, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.05480067377877597, |
|
"grad_norm": 0.24538756906986237, |
|
"learning_rate": 4.146315789473684e-05, |
|
"loss": 0.911, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.05524985962942167, |
|
"grad_norm": 0.2484789937734604, |
|
"learning_rate": 4.093157894736842e-05, |
|
"loss": 0.9066, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.05569904548006738, |
|
"grad_norm": 0.2707575261592865, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 0.8681, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05614823133071308, |
|
"grad_norm": 0.35909488797187805, |
|
"learning_rate": 3.986842105263158e-05, |
|
"loss": 0.8218, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.056597417181358786, |
|
"grad_norm": 0.23776692152023315, |
|
"learning_rate": 3.933684210526316e-05, |
|
"loss": 0.9211, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.05704660303200449, |
|
"grad_norm": 0.25755029916763306, |
|
"learning_rate": 3.880526315789473e-05, |
|
"loss": 0.8879, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.057495788882650195, |
|
"grad_norm": 0.2494814097881317, |
|
"learning_rate": 3.827368421052632e-05, |
|
"loss": 0.8906, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0579449747332959, |
|
"grad_norm": 0.25940045714378357, |
|
"learning_rate": 3.7742105263157896e-05, |
|
"loss": 0.8868, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.058394160583941604, |
|
"grad_norm": 0.2853882908821106, |
|
"learning_rate": 3.721052631578947e-05, |
|
"loss": 0.8775, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05884334643458731, |
|
"grad_norm": 0.32979917526245117, |
|
"learning_rate": 3.6678947368421054e-05, |
|
"loss": 0.8522, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05929253228523301, |
|
"grad_norm": 0.255938321352005, |
|
"learning_rate": 3.6147368421052636e-05, |
|
"loss": 0.9059, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05974171813587872, |
|
"grad_norm": 0.2526702284812927, |
|
"learning_rate": 3.561578947368421e-05, |
|
"loss": 0.9105, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.06019090398652442, |
|
"grad_norm": 0.2506902813911438, |
|
"learning_rate": 3.508421052631579e-05, |
|
"loss": 0.8951, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.060640089837170126, |
|
"grad_norm": 0.2672176957130432, |
|
"learning_rate": 3.455263157894737e-05, |
|
"loss": 0.8967, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06108927568781583, |
|
"grad_norm": 0.2774716913700104, |
|
"learning_rate": 3.402105263157895e-05, |
|
"loss": 0.8891, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"grad_norm": 0.3068337142467499, |
|
"learning_rate": 3.3489473684210526e-05, |
|
"loss": 0.8719, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.061987647389107246, |
|
"grad_norm": 0.2592477798461914, |
|
"learning_rate": 3.295789473684211e-05, |
|
"loss": 0.9152, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.06243683323975295, |
|
"grad_norm": 0.24649055302143097, |
|
"learning_rate": 3.242631578947368e-05, |
|
"loss": 0.8869, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.06288601909039865, |
|
"grad_norm": 0.24601422250270844, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 0.9147, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06333520494104436, |
|
"grad_norm": 0.2531537115573883, |
|
"learning_rate": 3.136315789473685e-05, |
|
"loss": 0.9067, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.06378439079169006, |
|
"grad_norm": 0.27577677369117737, |
|
"learning_rate": 3.083157894736842e-05, |
|
"loss": 0.8926, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.06423357664233577, |
|
"grad_norm": 0.3049788475036621, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 0.8536, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.06468276249298147, |
|
"grad_norm": 0.26886892318725586, |
|
"learning_rate": 2.9768421052631577e-05, |
|
"loss": 0.8746, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.06513194834362718, |
|
"grad_norm": 0.2674404978752136, |
|
"learning_rate": 2.923684210526316e-05, |
|
"loss": 0.8991, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06558113419427287, |
|
"grad_norm": 0.2553546726703644, |
|
"learning_rate": 2.8705263157894737e-05, |
|
"loss": 0.8974, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.06603032004491859, |
|
"grad_norm": 0.2580017149448395, |
|
"learning_rate": 2.8173684210526313e-05, |
|
"loss": 0.8854, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.06647950589556428, |
|
"grad_norm": 0.27426302433013916, |
|
"learning_rate": 2.7642105263157898e-05, |
|
"loss": 0.8885, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06692869174621, |
|
"grad_norm": 0.3027507960796356, |
|
"learning_rate": 2.7110526315789473e-05, |
|
"loss": 0.8973, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.06737787759685569, |
|
"grad_norm": 0.36818036437034607, |
|
"learning_rate": 2.6578947368421052e-05, |
|
"loss": 0.8283, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06737787759685569, |
|
"eval_loss": 0.8881184458732605, |
|
"eval_runtime": 207.2757, |
|
"eval_samples_per_second": 144.711, |
|
"eval_steps_per_second": 4.525, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0678270634475014, |
|
"grad_norm": 0.245889350771904, |
|
"learning_rate": 2.6047368421052634e-05, |
|
"loss": 0.9066, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.06827624929814712, |
|
"grad_norm": 0.24444212019443512, |
|
"learning_rate": 2.5515789473684213e-05, |
|
"loss": 0.9163, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06872543514879281, |
|
"grad_norm": 0.24982236325740814, |
|
"learning_rate": 2.4984210526315788e-05, |
|
"loss": 0.8824, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.06917462099943852, |
|
"grad_norm": 0.2683698534965515, |
|
"learning_rate": 2.445263157894737e-05, |
|
"loss": 0.8958, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.06962380685008422, |
|
"grad_norm": 0.27906811237335205, |
|
"learning_rate": 2.3921052631578946e-05, |
|
"loss": 0.8996, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07007299270072993, |
|
"grad_norm": 0.3476739823818207, |
|
"learning_rate": 2.3389473684210528e-05, |
|
"loss": 0.8366, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.07052217855137563, |
|
"grad_norm": 0.26626768708229065, |
|
"learning_rate": 2.2857894736842106e-05, |
|
"loss": 0.9163, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.07097136440202134, |
|
"grad_norm": 0.26524534821510315, |
|
"learning_rate": 2.2326315789473685e-05, |
|
"loss": 0.895, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.07142055025266704, |
|
"grad_norm": 0.26299136877059937, |
|
"learning_rate": 2.1794736842105264e-05, |
|
"loss": 0.8973, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.07186973610331275, |
|
"grad_norm": 0.2615616023540497, |
|
"learning_rate": 2.1263157894736842e-05, |
|
"loss": 0.901, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07231892195395845, |
|
"grad_norm": 0.2764776945114136, |
|
"learning_rate": 2.073157894736842e-05, |
|
"loss": 0.8718, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.07276810780460416, |
|
"grad_norm": 0.30812153220176697, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 0.877, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.07321729365524986, |
|
"grad_norm": 0.2510615289211273, |
|
"learning_rate": 1.966842105263158e-05, |
|
"loss": 0.8739, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.07366647950589557, |
|
"grad_norm": 0.24000753462314606, |
|
"learning_rate": 1.913684210526316e-05, |
|
"loss": 0.913, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.07411566535654127, |
|
"grad_norm": 0.24215757846832275, |
|
"learning_rate": 1.8605263157894736e-05, |
|
"loss": 0.9063, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07456485120718698, |
|
"grad_norm": 0.26448169350624084, |
|
"learning_rate": 1.8073684210526318e-05, |
|
"loss": 0.8805, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.07501403705783267, |
|
"grad_norm": 0.2861745357513428, |
|
"learning_rate": 1.7542105263157897e-05, |
|
"loss": 0.9049, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.07546322290847839, |
|
"grad_norm": 0.30888983607292175, |
|
"learning_rate": 1.7010526315789475e-05, |
|
"loss": 0.867, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.07591240875912408, |
|
"grad_norm": 0.27784985303878784, |
|
"learning_rate": 1.6478947368421054e-05, |
|
"loss": 0.8749, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.0763615946097698, |
|
"grad_norm": 0.2495727688074112, |
|
"learning_rate": 1.5947368421052633e-05, |
|
"loss": 0.8959, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07681078046041549, |
|
"grad_norm": 0.24338717758655548, |
|
"learning_rate": 1.541578947368421e-05, |
|
"loss": 0.8821, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.0772599663110612, |
|
"grad_norm": 0.25039950013160706, |
|
"learning_rate": 1.4884210526315788e-05, |
|
"loss": 0.8733, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.0777091521617069, |
|
"grad_norm": 0.27508071064949036, |
|
"learning_rate": 1.4352631578947369e-05, |
|
"loss": 0.9056, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.07815833801235261, |
|
"grad_norm": 0.2863399386405945, |
|
"learning_rate": 1.3821052631578949e-05, |
|
"loss": 0.8748, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.07860752386299831, |
|
"grad_norm": 0.3646789491176605, |
|
"learning_rate": 1.3289473684210526e-05, |
|
"loss": 0.8383, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07905670971364402, |
|
"grad_norm": 0.2623049020767212, |
|
"learning_rate": 1.2757894736842106e-05, |
|
"loss": 0.8969, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07950589556428972, |
|
"grad_norm": 0.26813623309135437, |
|
"learning_rate": 1.2226315789473685e-05, |
|
"loss": 0.899, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.07995508141493543, |
|
"grad_norm": 0.2505576014518738, |
|
"learning_rate": 1.1694736842105264e-05, |
|
"loss": 0.8629, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.08040426726558113, |
|
"grad_norm": 0.25836649537086487, |
|
"learning_rate": 1.1163157894736842e-05, |
|
"loss": 0.8929, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.08085345311622684, |
|
"grad_norm": 0.27238643169403076, |
|
"learning_rate": 1.0631578947368421e-05, |
|
"loss": 0.8732, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08130263896687254, |
|
"grad_norm": 0.33109569549560547, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 0.8555, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.08175182481751825, |
|
"grad_norm": 0.24706503748893738, |
|
"learning_rate": 9.56842105263158e-06, |
|
"loss": 0.8922, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.08220101066816396, |
|
"grad_norm": 0.24443064630031586, |
|
"learning_rate": 9.036842105263159e-06, |
|
"loss": 0.9011, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.08265019651880966, |
|
"grad_norm": 0.24768005311489105, |
|
"learning_rate": 8.505263157894738e-06, |
|
"loss": 0.91, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.08309938236945537, |
|
"grad_norm": 0.2456647902727127, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 0.8728, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08354856822010107, |
|
"grad_norm": 0.2784600555896759, |
|
"learning_rate": 7.442105263157894e-06, |
|
"loss": 0.8891, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.08399775407074678, |
|
"grad_norm": 0.3673495948314667, |
|
"learning_rate": 6.9105263157894745e-06, |
|
"loss": 0.8639, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.08444693992139247, |
|
"grad_norm": 0.253378301858902, |
|
"learning_rate": 6.378947368421053e-06, |
|
"loss": 0.8923, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.08489612577203819, |
|
"grad_norm": 0.23032832145690918, |
|
"learning_rate": 5.847368421052632e-06, |
|
"loss": 0.9097, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.08534531162268388, |
|
"grad_norm": 0.24500040709972382, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 0.9037, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0857944974733296, |
|
"grad_norm": 0.2614372670650482, |
|
"learning_rate": 4.78421052631579e-06, |
|
"loss": 0.9011, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.08624368332397529, |
|
"grad_norm": 0.2819826900959015, |
|
"learning_rate": 4.252631578947369e-06, |
|
"loss": 0.8751, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.086692869174621, |
|
"grad_norm": 0.29293060302734375, |
|
"learning_rate": 3.721052631578947e-06, |
|
"loss": 0.8822, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.0871420550252667, |
|
"grad_norm": 0.2739028036594391, |
|
"learning_rate": 3.1894736842105266e-06, |
|
"loss": 0.8568, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.08759124087591241, |
|
"grad_norm": 0.23401287198066711, |
|
"learning_rate": 2.6578947368421053e-06, |
|
"loss": 0.8958, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.08804042672655811, |
|
"grad_norm": 0.23881946504116058, |
|
"learning_rate": 2.1263157894736844e-06, |
|
"loss": 0.8922, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.08848961257720382, |
|
"grad_norm": 0.2490690052509308, |
|
"learning_rate": 1.5947368421052633e-06, |
|
"loss": 0.8816, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.08893879842784952, |
|
"grad_norm": 0.26390817761421204, |
|
"learning_rate": 1.0631578947368422e-06, |
|
"loss": 0.8778, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.08938798427849523, |
|
"grad_norm": 0.27074772119522095, |
|
"learning_rate": 5.315789473684211e-07, |
|
"loss": 0.8248, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.08983717012914093, |
|
"grad_norm": 0.3729373812675476, |
|
"learning_rate": 0.0, |
|
"loss": 0.8088, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08983717012914093, |
|
"eval_loss": 0.8799543380737305, |
|
"eval_runtime": 207.3734, |
|
"eval_samples_per_second": 144.642, |
|
"eval_steps_per_second": 4.523, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1532281667584e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|