yukidump's picture
ppllava_vicuna_final
67efb10
raw
history blame
20.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999115122555526,
"eval_steps": 500,
"global_step": 5650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008848774444739404,
"grad_norm": 1.8227072749903463,
"learning_rate": 5.882352941176471e-06,
"loss": 1.434,
"step": 50
},
{
"epoch": 0.017697548889478807,
"grad_norm": 1.593675125457607,
"learning_rate": 1.1764705882352942e-05,
"loss": 1.1202,
"step": 100
},
{
"epoch": 0.026546323334218212,
"grad_norm": 1.5547518309505592,
"learning_rate": 1.7647058823529414e-05,
"loss": 1.0971,
"step": 150
},
{
"epoch": 0.035395097778957614,
"grad_norm": 1.3952753982651918,
"learning_rate": 1.9998521094455198e-05,
"loss": 1.0463,
"step": 200
},
{
"epoch": 0.044243872223697016,
"grad_norm": 1.6576864383381864,
"learning_rate": 1.9989484922416503e-05,
"loss": 1.0808,
"step": 250
},
{
"epoch": 0.053092646668436425,
"grad_norm": 1.340907083636223,
"learning_rate": 1.9972241607451552e-05,
"loss": 1.0342,
"step": 300
},
{
"epoch": 0.061941421113175826,
"grad_norm": 1.269578433993962,
"learning_rate": 1.9946805316291817e-05,
"loss": 1.0199,
"step": 350
},
{
"epoch": 0.07079019555791523,
"grad_norm": 1.2537035911999723,
"learning_rate": 1.9913196946839304e-05,
"loss": 1.0137,
"step": 400
},
{
"epoch": 0.07963897000265463,
"grad_norm": 1.1857124728289088,
"learning_rate": 1.987144411099731e-05,
"loss": 1.0133,
"step": 450
},
{
"epoch": 0.08848774444739403,
"grad_norm": 1.2181869575632758,
"learning_rate": 1.9821581111985072e-05,
"loss": 1.0178,
"step": 500
},
{
"epoch": 0.09733651889213343,
"grad_norm": 1.2987295471871965,
"learning_rate": 1.9763648916154982e-05,
"loss": 1.0127,
"step": 550
},
{
"epoch": 0.10618529333687285,
"grad_norm": 1.2413868753158877,
"learning_rate": 1.9697695119335547e-05,
"loss": 0.9979,
"step": 600
},
{
"epoch": 0.11503406778161225,
"grad_norm": 1.2626131743513744,
"learning_rate": 1.9623773907727682e-05,
"loss": 0.9965,
"step": 650
},
{
"epoch": 0.12388284222635165,
"grad_norm": 1.3730231292537942,
"learning_rate": 1.954194601338651e-05,
"loss": 0.9942,
"step": 700
},
{
"epoch": 0.13273161667109104,
"grad_norm": 1.2218007272454348,
"learning_rate": 1.9452278664325227e-05,
"loss": 1.0036,
"step": 750
},
{
"epoch": 0.14158039111583046,
"grad_norm": 1.1454037410098823,
"learning_rate": 1.9354845529282042e-05,
"loss": 0.9868,
"step": 800
},
{
"epoch": 0.15042916556056987,
"grad_norm": 1.199534918146064,
"learning_rate": 1.9249726657195534e-05,
"loss": 0.9972,
"step": 850
},
{
"epoch": 0.15927794000530926,
"grad_norm": 1.2920998134175072,
"learning_rate": 1.9137008411438213e-05,
"loss": 1.0239,
"step": 900
},
{
"epoch": 0.16812671445004868,
"grad_norm": 1.1321328064281995,
"learning_rate": 1.901678339886223e-05,
"loss": 0.9807,
"step": 950
},
{
"epoch": 0.17697548889478806,
"grad_norm": 1.1146456739633037,
"learning_rate": 1.8889150393715627e-05,
"loss": 0.981,
"step": 1000
},
{
"epoch": 0.18582426333952748,
"grad_norm": 1.161097648736237,
"learning_rate": 1.8754214256491564e-05,
"loss": 0.9826,
"step": 1050
},
{
"epoch": 0.19467303778426687,
"grad_norm": 1.2010813609189326,
"learning_rate": 1.8612085847777215e-05,
"loss": 0.9846,
"step": 1100
},
{
"epoch": 0.20352181222900628,
"grad_norm": 1.203856802982565,
"learning_rate": 1.8462881937173144e-05,
"loss": 0.9789,
"step": 1150
},
{
"epoch": 0.2123705866737457,
"grad_norm": 1.1809801509975393,
"learning_rate": 1.8306725107357933e-05,
"loss": 0.9785,
"step": 1200
},
{
"epoch": 0.22121936111848509,
"grad_norm": 1.1856255544481202,
"learning_rate": 1.8143743653376944e-05,
"loss": 0.9724,
"step": 1250
},
{
"epoch": 0.2300681355632245,
"grad_norm": 1.2932019902094527,
"learning_rate": 1.7974071477237887e-05,
"loss": 0.9741,
"step": 1300
},
{
"epoch": 0.2389169100079639,
"grad_norm": 1.1399596376970142,
"learning_rate": 1.7797847977899873e-05,
"loss": 0.9787,
"step": 1350
},
{
"epoch": 0.2477656844527033,
"grad_norm": 1.1851681853908578,
"learning_rate": 1.7615217936746246e-05,
"loss": 0.9712,
"step": 1400
},
{
"epoch": 0.2566144588974427,
"grad_norm": 1.212090367995841,
"learning_rate": 1.742633139863538e-05,
"loss": 0.9729,
"step": 1450
},
{
"epoch": 0.2654632333421821,
"grad_norm": 1.0975454688592081,
"learning_rate": 1.7231343548627085e-05,
"loss": 0.9714,
"step": 1500
},
{
"epoch": 0.2743120077869215,
"grad_norm": 1.0110033370546834,
"learning_rate": 1.7030414584485938e-05,
"loss": 0.9591,
"step": 1550
},
{
"epoch": 0.2831607822316609,
"grad_norm": 1.0352711739713445,
"learning_rate": 1.6823709585066308e-05,
"loss": 0.9719,
"step": 1600
},
{
"epoch": 0.29200955667640033,
"grad_norm": 1.1174206790465606,
"learning_rate": 1.6611398374687172e-05,
"loss": 0.9673,
"step": 1650
},
{
"epoch": 0.30085833112113974,
"grad_norm": 1.1508488673423878,
"learning_rate": 1.6393655383608132e-05,
"loss": 0.9579,
"step": 1700
},
{
"epoch": 0.3097071055658791,
"grad_norm": 1.1140112909261894,
"learning_rate": 1.6170659504721365e-05,
"loss": 0.9773,
"step": 1750
},
{
"epoch": 0.3185558800106185,
"grad_norm": 1.078883305222083,
"learning_rate": 1.594259394657707e-05,
"loss": 0.963,
"step": 1800
},
{
"epoch": 0.32740465445535794,
"grad_norm": 1.0741496670790676,
"learning_rate": 1.570964608286336e-05,
"loss": 0.9665,
"step": 1850
},
{
"epoch": 0.33625342890009735,
"grad_norm": 1.0674741658785543,
"learning_rate": 1.5472007298464117e-05,
"loss": 0.9577,
"step": 1900
},
{
"epoch": 0.34510220334483677,
"grad_norm": 1.1266524576573997,
"learning_rate": 1.5229872832221336e-05,
"loss": 0.9578,
"step": 1950
},
{
"epoch": 0.3539509777895761,
"grad_norm": 1.0507368907995636,
"learning_rate": 1.4983441616531152e-05,
"loss": 0.9543,
"step": 2000
},
{
"epoch": 0.36279975223431554,
"grad_norm": 1.5910538700413814,
"learning_rate": 1.4732916113905336e-05,
"loss": 0.9499,
"step": 2050
},
{
"epoch": 0.37164852667905496,
"grad_norm": 1.1710473762069435,
"learning_rate": 1.4478502150632503e-05,
"loss": 0.9928,
"step": 2100
},
{
"epoch": 0.3804973011237944,
"grad_norm": 1.1721776444324115,
"learning_rate": 1.4220408747675714e-05,
"loss": 0.9509,
"step": 2150
},
{
"epoch": 0.38934607556853373,
"grad_norm": 1.1265584958834658,
"learning_rate": 1.3958847948945428e-05,
"loss": 0.9437,
"step": 2200
},
{
"epoch": 0.39819485001327315,
"grad_norm": 1.1519073177115475,
"learning_rate": 1.369403464708884e-05,
"loss": 0.9445,
"step": 2250
},
{
"epoch": 0.40704362445801257,
"grad_norm": 1.1172434119258432,
"learning_rate": 1.3426186406938769e-05,
"loss": 1.0387,
"step": 2300
},
{
"epoch": 0.415892398902752,
"grad_norm": 1.138922531256483,
"learning_rate": 1.315552328676714e-05,
"loss": 0.9391,
"step": 2350
},
{
"epoch": 0.4247411733474914,
"grad_norm": 1.089137186693905,
"learning_rate": 1.2882267657489908e-05,
"loss": 0.9457,
"step": 2400
},
{
"epoch": 0.43358994779223076,
"grad_norm": 1.0358420925020666,
"learning_rate": 1.2606644019971967e-05,
"loss": 0.9972,
"step": 2450
},
{
"epoch": 0.44243872223697017,
"grad_norm": 1.0748089642780165,
"learning_rate": 1.2328878820582122e-05,
"loss": 0.926,
"step": 2500
},
{
"epoch": 0.4512874966817096,
"grad_norm": 1.1178495139589024,
"learning_rate": 1.204920026514971e-05,
"loss": 0.9371,
"step": 2550
},
{
"epoch": 0.460136271126449,
"grad_norm": 1.0570225052003097,
"learning_rate": 1.1767838131475654e-05,
"loss": 0.9299,
"step": 2600
},
{
"epoch": 0.46898504557118836,
"grad_norm": 1.198704612437538,
"learning_rate": 1.1485023580552039e-05,
"loss": 0.9333,
"step": 2650
},
{
"epoch": 0.4778338200159278,
"grad_norm": 1.2153247727284249,
"learning_rate": 1.1200988966645286e-05,
"loss": 0.9325,
"step": 2700
},
{
"epoch": 0.4866825944606672,
"grad_norm": 1.0862037277462553,
"learning_rate": 1.091596764639895e-05,
"loss": 0.9341,
"step": 2750
},
{
"epoch": 0.4955313689054066,
"grad_norm": 1.0724182576148855,
"learning_rate": 1.0630193787112994e-05,
"loss": 0.9063,
"step": 2800
},
{
"epoch": 0.504380143350146,
"grad_norm": 1.0396985853342051,
"learning_rate": 1.034390217435704e-05,
"loss": 0.9293,
"step": 2850
},
{
"epoch": 0.5132289177948854,
"grad_norm": 1.0749902902208996,
"learning_rate": 1.005732801907567e-05,
"loss": 0.9214,
"step": 2900
},
{
"epoch": 0.5220776922396249,
"grad_norm": 1.085293805471844,
"learning_rate": 9.770706764344235e-06,
"loss": 1.0245,
"step": 2950
},
{
"epoch": 0.5309264666843642,
"grad_norm": 1.1358464620386077,
"learning_rate": 9.484273891933982e-06,
"loss": 0.9297,
"step": 3000
},
{
"epoch": 0.5397752411291036,
"grad_norm": 1.102492622904414,
"learning_rate": 9.198264728845332e-06,
"loss": 0.9157,
"step": 3050
},
{
"epoch": 0.548624015573843,
"grad_norm": 1.1636978192620964,
"learning_rate": 8.912914253968391e-06,
"loss": 0.9236,
"step": 3100
},
{
"epoch": 0.5574727900185824,
"grad_norm": 1.1333308119371828,
"learning_rate": 8.628456905029383e-06,
"loss": 0.9158,
"step": 3150
},
{
"epoch": 0.5663215644633218,
"grad_norm": 1.117341944549429,
"learning_rate": 8.345126385981737e-06,
"loss": 0.9102,
"step": 3200
},
{
"epoch": 0.5751703389080612,
"grad_norm": 1.0840357862773122,
"learning_rate": 8.063155475000037e-06,
"loss": 0.9546,
"step": 3250
},
{
"epoch": 0.5840191133528007,
"grad_norm": 1.1791446775850642,
"learning_rate": 7.782775833234522e-06,
"loss": 0.924,
"step": 3300
},
{
"epoch": 0.5928678877975401,
"grad_norm": 1.1948088039686837,
"learning_rate": 7.504217814483364e-06,
"loss": 0.9135,
"step": 3350
},
{
"epoch": 0.6017166622422795,
"grad_norm": 1.107584300567853,
"learning_rate": 7.227710275938987e-06,
"loss": 0.9088,
"step": 3400
},
{
"epoch": 0.6105654366870189,
"grad_norm": 1.167878367521536,
"learning_rate": 6.953480390164001e-06,
"loss": 0.9394,
"step": 3450
},
{
"epoch": 0.6194142111317582,
"grad_norm": 1.0387264654842252,
"learning_rate": 6.68175345845119e-06,
"loss": 0.9022,
"step": 3500
},
{
"epoch": 0.6282629855764976,
"grad_norm": 1.2650051247263,
"learning_rate": 6.412752725720864e-06,
"loss": 0.9135,
"step": 3550
},
{
"epoch": 0.637111760021237,
"grad_norm": 1.20136444284978,
"learning_rate": 6.146699197107715e-06,
"loss": 0.9068,
"step": 3600
},
{
"epoch": 0.6459605344659765,
"grad_norm": 1.0811422393549932,
"learning_rate": 5.883811456387821e-06,
"loss": 0.9082,
"step": 3650
},
{
"epoch": 0.6548093089107159,
"grad_norm": 1.1656545210876348,
"learning_rate": 5.6243054863949675e-06,
"loss": 0.8898,
"step": 3700
},
{
"epoch": 0.6636580833554553,
"grad_norm": 1.1852994908957295,
"learning_rate": 5.368394491573876e-06,
"loss": 0.9026,
"step": 3750
},
{
"epoch": 0.6725068578001947,
"grad_norm": 1.161821140479422,
"learning_rate": 5.116288722816087e-06,
"loss": 0.8838,
"step": 3800
},
{
"epoch": 0.6813556322449341,
"grad_norm": 1.2037332656822164,
"learning_rate": 4.868195304722391e-06,
"loss": 0.9025,
"step": 3850
},
{
"epoch": 0.6902044066896735,
"grad_norm": 1.1242488366634837,
"learning_rate": 4.6243180654337975e-06,
"loss": 0.931,
"step": 3900
},
{
"epoch": 0.6990531811344128,
"grad_norm": 1.1696361772435924,
"learning_rate": 4.384857369170772e-06,
"loss": 0.9338,
"step": 3950
},
{
"epoch": 0.7079019555791523,
"grad_norm": 1.2264055771278481,
"learning_rate": 4.1500099516183555e-06,
"loss": 0.8993,
"step": 4000
},
{
"epoch": 0.7167507300238917,
"grad_norm": 1.1225371954007977,
"learning_rate": 3.919968758292425e-06,
"loss": 0.9044,
"step": 4050
},
{
"epoch": 0.7255995044686311,
"grad_norm": 1.1128045918827218,
"learning_rate": 3.6949227860198712e-06,
"loss": 0.8963,
"step": 4100
},
{
"epoch": 0.7344482789133705,
"grad_norm": 1.1426952334649678,
"learning_rate": 3.475056927662912e-06,
"loss": 0.8955,
"step": 4150
},
{
"epoch": 0.7432970533581099,
"grad_norm": 1.0853524038336615,
"learning_rate": 3.2605518202151577e-06,
"loss": 0.8973,
"step": 4200
},
{
"epoch": 0.7521458278028493,
"grad_norm": 1.1322485210453683,
"learning_rate": 3.0515836963942056e-06,
"loss": 0.8944,
"step": 4250
},
{
"epoch": 0.7609946022475887,
"grad_norm": 1.2182044740120312,
"learning_rate": 2.8483242398526723e-06,
"loss": 0.8872,
"step": 4300
},
{
"epoch": 0.7698433766923282,
"grad_norm": 1.0357920295682677,
"learning_rate": 2.650940444126654e-06,
"loss": 0.8856,
"step": 4350
},
{
"epoch": 0.7786921511370675,
"grad_norm": 0.984081860446035,
"learning_rate": 2.4595944754374723e-06,
"loss": 0.8818,
"step": 4400
},
{
"epoch": 0.7875409255818069,
"grad_norm": 1.1062257149827126,
"learning_rate": 2.27444353945945e-06,
"loss": 0.8883,
"step": 4450
},
{
"epoch": 0.7963897000265463,
"grad_norm": 1.1408547470520316,
"learning_rate": 2.0956397521631666e-06,
"loss": 0.8729,
"step": 4500
},
{
"epoch": 0.8052384744712857,
"grad_norm": 1.2286350695548351,
"learning_rate": 1.9233300148402767e-06,
"loss": 0.8782,
"step": 4550
},
{
"epoch": 0.8140872489160251,
"grad_norm": 1.1745122607432803,
"learning_rate": 1.757655893412622e-06,
"loss": 0.8763,
"step": 4600
},
{
"epoch": 0.8229360233607645,
"grad_norm": 1.291525874104284,
"learning_rate": 1.5987535021247668e-06,
"loss": 0.8817,
"step": 4650
},
{
"epoch": 0.831784797805504,
"grad_norm": 1.1484560802799162,
"learning_rate": 1.4467533917154842e-06,
"loss": 0.8914,
"step": 4700
},
{
"epoch": 0.8406335722502434,
"grad_norm": 1.0916994218547142,
"learning_rate": 1.3017804421601298e-06,
"loss": 0.9154,
"step": 4750
},
{
"epoch": 0.8494823466949828,
"grad_norm": 1.2124618779904544,
"learning_rate": 1.1639537600719764e-06,
"loss": 0.8821,
"step": 4800
},
{
"epoch": 0.8583311211397221,
"grad_norm": 1.1869802843495634,
"learning_rate": 1.0333865808468203e-06,
"loss": 0.8821,
"step": 4850
},
{
"epoch": 0.8671798955844615,
"grad_norm": 1.134603833533901,
"learning_rate": 9.101861756312369e-07,
"loss": 0.8799,
"step": 4900
},
{
"epoch": 0.8760286700292009,
"grad_norm": 1.23950870941106,
"learning_rate": 7.944537631909666e-07,
"loss": 0.8874,
"step": 4950
},
{
"epoch": 0.8848774444739403,
"grad_norm": 1.0710206299690663,
"learning_rate": 6.862844267517643e-07,
"loss": 0.9178,
"step": 5000
},
{
"epoch": 0.8937262189186798,
"grad_norm": 1.1079780487199702,
"learning_rate": 5.857670358811096e-07,
"loss": 0.9139,
"step": 5050
},
{
"epoch": 0.9025749933634192,
"grad_norm": 1.224157319904236,
"learning_rate": 4.929841734749063e-07,
"loss": 0.883,
"step": 5100
},
{
"epoch": 0.9114237678081586,
"grad_norm": 1.2973098944898664,
"learning_rate": 4.0801206790916815e-07,
"loss": 0.8748,
"step": 5150
},
{
"epoch": 0.920272542252898,
"grad_norm": 1.2788191274563776,
"learning_rate": 3.309205304124552e-07,
"loss": 0.9109,
"step": 5200
},
{
"epoch": 0.9291213166976374,
"grad_norm": 1.1179855803922911,
"learning_rate": 2.6177289771049274e-07,
"loss": 0.8944,
"step": 5250
},
{
"epoch": 0.9379700911423767,
"grad_norm": 1.1595081612750888,
"learning_rate": 2.0062597999009114e-07,
"loss": 0.8857,
"step": 5300
},
{
"epoch": 0.9468188655871161,
"grad_norm": 1.0486044192293602,
"learning_rate": 1.4753001422514125e-07,
"loss": 0.8827,
"step": 5350
},
{
"epoch": 0.9556676400318556,
"grad_norm": 1.1477897530038004,
"learning_rate": 1.0252862290301092e-07,
"loss": 0.8769,
"step": 5400
},
{
"epoch": 0.964516414476595,
"grad_norm": 1.1863014296064434,
"learning_rate": 6.565877818526245e-08,
"loss": 0.8754,
"step": 5450
},
{
"epoch": 0.9733651889213344,
"grad_norm": 1.2034312267234064,
"learning_rate": 3.6950771532126004e-08,
"loss": 0.8723,
"step": 5500
},
{
"epoch": 0.9822139633660738,
"grad_norm": 1.1269276845827294,
"learning_rate": 1.6428188815703627e-08,
"loss": 0.9178,
"step": 5550
},
{
"epoch": 0.9910627378108132,
"grad_norm": 1.2360290358413641,
"learning_rate": 4.1078909423253325e-09,
"loss": 0.8848,
"step": 5600
},
{
"epoch": 0.9999115122555526,
"grad_norm": 1.271675294761002,
"learning_rate": 0.0,
"loss": 0.8882,
"step": 5650
},
{
"epoch": 0.9999115122555526,
"step": 5650,
"total_flos": 3.3418410989715456e+16,
"train_loss": 0.94595458984375,
"train_runtime": 88600.3448,
"train_samples_per_second": 0.51,
"train_steps_per_second": 0.064
}
],
"logging_steps": 50,
"max_steps": 5650,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 3.3418410989715456e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}