|
{ |
|
"best_metric": 0.7956131605184447, |
|
"best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-1090", |
|
"epoch": 9.954337899543379, |
|
"eval_steps": 500, |
|
"global_step": 1090, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.768280267715454, |
|
"learning_rate": 0.004954128440366973, |
|
"loss": 1.2551, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6224374771118164, |
|
"learning_rate": 0.004908256880733945, |
|
"loss": 1.1194, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.8902679085731506, |
|
"learning_rate": 0.004862385321100918, |
|
"loss": 0.9226, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.336371898651123, |
|
"learning_rate": 0.00481651376146789, |
|
"loss": 0.9343, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9396265745162964, |
|
"learning_rate": 0.0047706422018348625, |
|
"loss": 0.849, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.6278876066207886, |
|
"learning_rate": 0.004724770642201835, |
|
"loss": 0.8533, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.5454286336898804, |
|
"learning_rate": 0.004678899082568808, |
|
"loss": 0.8229, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.064020037651062, |
|
"learning_rate": 0.00463302752293578, |
|
"loss": 0.8622, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.5100964307785034, |
|
"learning_rate": 0.0045871559633027525, |
|
"loss": 0.8633, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.3673182725906372, |
|
"learning_rate": 0.004541284403669725, |
|
"loss": 0.9012, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7228315054835494, |
|
"eval_f1": 0.34621811450823214, |
|
"eval_loss": 0.7629963159561157, |
|
"eval_precision": 0.42629140706004787, |
|
"eval_recall": 0.32692877938779574, |
|
"eval_runtime": 6.2499, |
|
"eval_samples_per_second": 160.484, |
|
"eval_steps_per_second": 10.08, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.9423499703407288, |
|
"learning_rate": 0.004495412844036698, |
|
"loss": 0.8922, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.50795841217041, |
|
"learning_rate": 0.0044495412844036695, |
|
"loss": 0.8791, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.4502021074295044, |
|
"learning_rate": 0.004403669724770643, |
|
"loss": 0.8973, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.703600287437439, |
|
"learning_rate": 0.004357798165137615, |
|
"loss": 0.8246, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.7422580122947693, |
|
"learning_rate": 0.004311926605504587, |
|
"loss": 0.7913, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.146699070930481, |
|
"learning_rate": 0.0042660550458715595, |
|
"loss": 0.8028, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.6000311374664307, |
|
"learning_rate": 0.004220183486238533, |
|
"loss": 0.9162, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.8894189596176147, |
|
"learning_rate": 0.004174311926605505, |
|
"loss": 0.9022, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.9741529226303101, |
|
"learning_rate": 0.004128440366972477, |
|
"loss": 0.8879, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 2.898531198501587, |
|
"learning_rate": 0.00408256880733945, |
|
"loss": 0.7974, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.5416871309280396, |
|
"learning_rate": 0.004036697247706422, |
|
"loss": 0.7636, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7288135593220338, |
|
"eval_f1": 0.3788829143385214, |
|
"eval_loss": 0.7212122082710266, |
|
"eval_precision": 0.591156357572359, |
|
"eval_recall": 0.3630725573348524, |
|
"eval_runtime": 5.4131, |
|
"eval_samples_per_second": 185.292, |
|
"eval_steps_per_second": 11.638, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.9239124059677124, |
|
"learning_rate": 0.003990825688073394, |
|
"loss": 0.8466, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.9725480675697327, |
|
"learning_rate": 0.003944954128440367, |
|
"loss": 0.8851, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.1037930250167847, |
|
"learning_rate": 0.0038990825688073397, |
|
"loss": 0.7814, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.5679117441177368, |
|
"learning_rate": 0.0038532110091743124, |
|
"loss": 0.7781, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.7649789452552795, |
|
"learning_rate": 0.0038073394495412843, |
|
"loss": 0.8029, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 1.4153032302856445, |
|
"learning_rate": 0.003761467889908257, |
|
"loss": 0.7439, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 1.177064299583435, |
|
"learning_rate": 0.0037155963302752293, |
|
"loss": 0.8781, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.9871839284896851, |
|
"learning_rate": 0.003669724770642202, |
|
"loss": 0.7439, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.7878830432891846, |
|
"learning_rate": 0.0036238532110091743, |
|
"loss": 0.8001, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.0698702335357666, |
|
"learning_rate": 0.003577981651376147, |
|
"loss": 0.8568, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.9107087850570679, |
|
"learning_rate": 0.0035321100917431194, |
|
"loss": 0.7189, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7258225324027916, |
|
"eval_f1": 0.393460298491453, |
|
"eval_loss": 0.7621892690658569, |
|
"eval_precision": 0.4464810481413866, |
|
"eval_recall": 0.422982191132074, |
|
"eval_runtime": 5.2671, |
|
"eval_samples_per_second": 190.428, |
|
"eval_steps_per_second": 11.961, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 1.1653732061386108, |
|
"learning_rate": 0.003486238532110092, |
|
"loss": 0.7245, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.693824291229248, |
|
"learning_rate": 0.0034403669724770644, |
|
"loss": 0.7883, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.7583943009376526, |
|
"learning_rate": 0.003394495412844037, |
|
"loss": 0.7618, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 1.0829322338104248, |
|
"learning_rate": 0.003348623853211009, |
|
"loss": 0.7534, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 1.2232468128204346, |
|
"learning_rate": 0.0033027522935779817, |
|
"loss": 0.6827, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 1.0095463991165161, |
|
"learning_rate": 0.003256880733944954, |
|
"loss": 0.7615, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 1.059535026550293, |
|
"learning_rate": 0.003211009174311927, |
|
"loss": 0.8094, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.9983842372894287, |
|
"learning_rate": 0.003165137614678899, |
|
"loss": 0.7649, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.6007803082466125, |
|
"learning_rate": 0.003119266055045872, |
|
"loss": 0.7825, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 1.1800183057785034, |
|
"learning_rate": 0.003073394495412844, |
|
"loss": 0.7296, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 1.049116611480713, |
|
"learning_rate": 0.003027522935779817, |
|
"loss": 0.6904, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7437686939182453, |
|
"eval_f1": 0.4115031629499951, |
|
"eval_loss": 0.7280949950218201, |
|
"eval_precision": 0.48879535404583313, |
|
"eval_recall": 0.4484100461852218, |
|
"eval_runtime": 5.2319, |
|
"eval_samples_per_second": 191.708, |
|
"eval_steps_per_second": 12.041, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.9003276824951172, |
|
"learning_rate": 0.002981651376146789, |
|
"loss": 0.6584, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 1.3586938381195068, |
|
"learning_rate": 0.002935779816513762, |
|
"loss": 0.7175, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.5924133062362671, |
|
"learning_rate": 0.0028899082568807338, |
|
"loss": 0.67, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.9187817573547363, |
|
"learning_rate": 0.0028440366972477065, |
|
"loss": 0.697, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 0.7464513778686523, |
|
"learning_rate": 0.002798165137614679, |
|
"loss": 0.6691, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 0.6532416343688965, |
|
"learning_rate": 0.0027522935779816515, |
|
"loss": 0.7217, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.7422506809234619, |
|
"learning_rate": 0.002706422018348624, |
|
"loss": 0.7619, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.8745353817939758, |
|
"learning_rate": 0.0026605504587155966, |
|
"loss": 0.7264, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 0.7916358113288879, |
|
"learning_rate": 0.002614678899082569, |
|
"loss": 0.7592, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 1.0833995342254639, |
|
"learning_rate": 0.0025688073394495416, |
|
"loss": 0.7263, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 1.0176126956939697, |
|
"learning_rate": 0.0025229357798165135, |
|
"loss": 0.7658, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7397806580259222, |
|
"eval_f1": 0.3752685826577452, |
|
"eval_loss": 0.7215412259101868, |
|
"eval_precision": 0.4855492779862528, |
|
"eval_recall": 0.42520631993699903, |
|
"eval_runtime": 5.5429, |
|
"eval_samples_per_second": 180.953, |
|
"eval_steps_per_second": 11.366, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 1.45496666431427, |
|
"learning_rate": 0.0024770642201834866, |
|
"loss": 0.7024, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"grad_norm": 0.8751833438873291, |
|
"learning_rate": 0.002431192660550459, |
|
"loss": 0.741, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 0.7062868475914001, |
|
"learning_rate": 0.0023853211009174312, |
|
"loss": 0.6733, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 0.9199981689453125, |
|
"learning_rate": 0.002339449541284404, |
|
"loss": 0.6988, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 0.8544884324073792, |
|
"learning_rate": 0.0022935779816513763, |
|
"loss": 0.7073, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"grad_norm": 1.0523695945739746, |
|
"learning_rate": 0.002247706422018349, |
|
"loss": 0.6822, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"grad_norm": 0.866678774356842, |
|
"learning_rate": 0.0022018348623853213, |
|
"loss": 0.6784, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"grad_norm": 0.6989203095436096, |
|
"learning_rate": 0.0021559633027522936, |
|
"loss": 0.6796, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 0.4832663834095001, |
|
"learning_rate": 0.0021100917431192663, |
|
"loss": 0.6472, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 1.1230007410049438, |
|
"learning_rate": 0.0020642201834862386, |
|
"loss": 0.6693, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 0.6341413259506226, |
|
"learning_rate": 0.002018348623853211, |
|
"loss": 0.6363, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7676969092721835, |
|
"eval_f1": 0.5121263559819323, |
|
"eval_loss": 0.6328557133674622, |
|
"eval_precision": 0.6350277126119822, |
|
"eval_recall": 0.49284577902844884, |
|
"eval_runtime": 5.4702, |
|
"eval_samples_per_second": 183.357, |
|
"eval_steps_per_second": 11.517, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.7254393696784973, |
|
"learning_rate": 0.0019724770642201837, |
|
"loss": 0.6398, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.6462991237640381, |
|
"learning_rate": 0.0019266055045871562, |
|
"loss": 0.6703, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 0.7446946501731873, |
|
"learning_rate": 0.0018807339449541285, |
|
"loss": 0.6184, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"grad_norm": 0.5196910500526428, |
|
"learning_rate": 0.001834862385321101, |
|
"loss": 0.6308, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"grad_norm": 1.019028902053833, |
|
"learning_rate": 0.0017889908256880735, |
|
"loss": 0.664, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 1.3352869749069214, |
|
"learning_rate": 0.001743119266055046, |
|
"loss": 0.638, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"grad_norm": 0.8105664253234863, |
|
"learning_rate": 0.0016972477064220186, |
|
"loss": 0.5801, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 1.0205323696136475, |
|
"learning_rate": 0.0016513761467889909, |
|
"loss": 0.6923, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"grad_norm": 0.48183074593544006, |
|
"learning_rate": 0.0016055045871559634, |
|
"loss": 0.6583, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"grad_norm": 0.7392696738243103, |
|
"learning_rate": 0.001559633027522936, |
|
"loss": 0.6784, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 0.7750418782234192, |
|
"learning_rate": 0.0015137614678899084, |
|
"loss": 0.6299, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7716849451645065, |
|
"eval_f1": 0.5713473581488847, |
|
"eval_loss": 0.6116508841514587, |
|
"eval_precision": 0.5962341473849692, |
|
"eval_recall": 0.5781145727281558, |
|
"eval_runtime": 5.4791, |
|
"eval_samples_per_second": 183.06, |
|
"eval_steps_per_second": 11.498, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 1.6914126873016357, |
|
"learning_rate": 0.001467889908256881, |
|
"loss": 0.7058, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 1.1266179084777832, |
|
"learning_rate": 0.0014220183486238532, |
|
"loss": 0.5969, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"grad_norm": 0.8198122978210449, |
|
"learning_rate": 0.0013761467889908258, |
|
"loss": 0.6296, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"grad_norm": 0.8668613433837891, |
|
"learning_rate": 0.0013302752293577983, |
|
"loss": 0.6779, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"grad_norm": 0.757351279258728, |
|
"learning_rate": 0.0012844036697247708, |
|
"loss": 0.6589, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 1.231268286705017, |
|
"learning_rate": 0.0012385321100917433, |
|
"loss": 0.5909, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"grad_norm": 0.6386240720748901, |
|
"learning_rate": 0.0011926605504587156, |
|
"loss": 0.6166, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"grad_norm": 0.7646284103393555, |
|
"learning_rate": 0.0011467889908256881, |
|
"loss": 0.5815, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 1.1417447328567505, |
|
"learning_rate": 0.0011009174311926607, |
|
"loss": 0.5825, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"grad_norm": 0.8213087916374207, |
|
"learning_rate": 0.0010550458715596332, |
|
"loss": 0.652, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": 0.8747499585151672, |
|
"learning_rate": 0.0010091743119266055, |
|
"loss": 0.6011, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7796610169491526, |
|
"eval_f1": 0.5902452724888688, |
|
"eval_loss": 0.5919255018234253, |
|
"eval_precision": 0.6162323714867404, |
|
"eval_recall": 0.575685794889542, |
|
"eval_runtime": 5.3294, |
|
"eval_samples_per_second": 188.202, |
|
"eval_steps_per_second": 11.821, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.9114809632301331, |
|
"learning_rate": 0.0009633027522935781, |
|
"loss": 0.6365, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"grad_norm": 0.7782912254333496, |
|
"learning_rate": 0.0009174311926605505, |
|
"loss": 0.5558, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"grad_norm": 1.2907453775405884, |
|
"learning_rate": 0.000871559633027523, |
|
"loss": 0.5756, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 1.1318057775497437, |
|
"learning_rate": 0.0008256880733944954, |
|
"loss": 0.6574, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.5756754875183105, |
|
"learning_rate": 0.000779816513761468, |
|
"loss": 0.5494, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"grad_norm": 0.7023201584815979, |
|
"learning_rate": 0.0007339449541284405, |
|
"loss": 0.6101, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"grad_norm": 0.9913588166236877, |
|
"learning_rate": 0.0006880733944954129, |
|
"loss": 0.6034, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"grad_norm": 0.982480525970459, |
|
"learning_rate": 0.0006422018348623854, |
|
"loss": 0.5981, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"grad_norm": 1.1374191045761108, |
|
"learning_rate": 0.0005963302752293578, |
|
"loss": 0.6134, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 0.7602503299713135, |
|
"learning_rate": 0.0005504587155963303, |
|
"loss": 0.5561, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 0.8967719674110413, |
|
"learning_rate": 0.0005045871559633027, |
|
"loss": 0.6043, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7946161515453639, |
|
"eval_f1": 0.5983005873522603, |
|
"eval_loss": 0.5475942492485046, |
|
"eval_precision": 0.6295002331514908, |
|
"eval_recall": 0.5812943349243114, |
|
"eval_runtime": 5.2749, |
|
"eval_samples_per_second": 190.145, |
|
"eval_steps_per_second": 11.943, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.9292692542076111, |
|
"learning_rate": 0.00045871559633027525, |
|
"loss": 0.5415, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"grad_norm": 0.8102772235870361, |
|
"learning_rate": 0.0004128440366972477, |
|
"loss": 0.5611, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 1.1832762956619263, |
|
"learning_rate": 0.00036697247706422024, |
|
"loss": 0.5876, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"grad_norm": 1.1258927583694458, |
|
"learning_rate": 0.0003211009174311927, |
|
"loss": 0.4946, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.8979453444480896, |
|
"learning_rate": 0.00027522935779816516, |
|
"loss": 0.5668, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 0.9506226778030396, |
|
"learning_rate": 0.00022935779816513763, |
|
"loss": 0.5384, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"grad_norm": 1.1283026933670044, |
|
"learning_rate": 0.00018348623853211012, |
|
"loss": 0.5772, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 0.9244889616966248, |
|
"learning_rate": 0.00013761467889908258, |
|
"loss": 0.5662, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"grad_norm": 0.8923238515853882, |
|
"learning_rate": 9.174311926605506e-05, |
|
"loss": 0.5921, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"grad_norm": 1.1653807163238525, |
|
"learning_rate": 4.587155963302753e-05, |
|
"loss": 0.5846, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"grad_norm": 0.8477634191513062, |
|
"learning_rate": 0.0, |
|
"loss": 0.5671, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.7956131605184447, |
|
"eval_f1": 0.5997799904348683, |
|
"eval_loss": 0.5543830990791321, |
|
"eval_precision": 0.627341257239154, |
|
"eval_recall": 0.5810367281093276, |
|
"eval_runtime": 5.4717, |
|
"eval_samples_per_second": 183.308, |
|
"eval_steps_per_second": 11.514, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"step": 1090, |
|
"total_flos": 5.442882169274339e+18, |
|
"train_loss": 0.7150778630457887, |
|
"train_runtime": 818.9501, |
|
"train_samples_per_second": 85.561, |
|
"train_steps_per_second": 1.331 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1090, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 5.442882169274339e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|