diff --git "a/job_new.json" "b/job_new.json" --- "a/job_new.json" +++ "b/job_new.json" @@ -14,14 +14,14 @@ "rope_alpha": null, "output_measurement": null, "fast_safetensors": false, - "progress": "quant", + "progress": "finished", "status_output": false, "cal_filename": "google/gemma-2-2b-EXL2/cal_data.safetensors", "last_module_idx": 54, "measurement": { "model.layers.0.self_attn": [ { - "accuracy": 0.9515928328037262, + "accuracy": 0.9515531063079834, "total_bits": 30308928, "q_proj": { "group_size": { @@ -85,7 +85,7 @@ } }, { - "accuracy": 0.953981950879097, + "accuracy": 0.9539748653769493, "total_bits": 31455808, "q_proj": { "group_size": { @@ -149,7 +149,7 @@ } }, { - "accuracy": 0.9591670855879784, + "accuracy": 0.9591861926019192, "total_bits": 33412832, "q_proj": { "group_size": { @@ -213,7 +213,7 @@ } }, { - "accuracy": 0.9699935875833035, + "accuracy": 0.9699958208948374, "total_bits": 37983200, "q_proj": { "group_size": { @@ -277,7 +277,7 @@ } }, { - "accuracy": 0.976397393271327, + "accuracy": 0.9763969834893942, "total_bits": 44838176, "q_proj": { "group_size": { @@ -341,7 +341,7 @@ } }, { - "accuracy": 0.9767184294760227, + "accuracy": 0.9767143614590168, "total_bits": 44912768, "q_proj": { "group_size": { @@ -405,7 +405,7 @@ } }, { - "accuracy": 0.986974217928946, + "accuracy": 0.9869659701362252, "total_bits": 57355552, "q_proj": { "group_size": { @@ -457,7 +457,7 @@ } }, { - "accuracy": 0.9872219935059547, + "accuracy": 0.9872188298031688, "total_bits": 57430144, "q_proj": { "group_size": { @@ -509,7 +509,7 @@ } }, { - "accuracy": 0.9881364433094859, + "accuracy": 0.9881322896108031, "total_bits": 57950464, "q_proj": { "group_size": { @@ -561,7 +561,7 @@ } }, { - "accuracy": 0.9888399662449956, + "accuracy": 0.9888321068137884, "total_bits": 58692736, "q_proj": { "group_size": { @@ -613,7 +613,7 @@ } }, { - "accuracy": 0.9880855781957507, + "accuracy": 0.9880868950858712, "total_bits": 59068544, "q_proj": { "group_size": { @@ -677,7 +677,7 @@ } }, { - "accuracy": 0.9889282267540693, + "accuracy": 0.9889282938092947, "total_bits": 59588864, "q_proj": { "group_size": { @@ -741,7 +741,7 @@ } }, { - "accuracy": 0.9899680046364665, + "accuracy": 0.9899678928777575, "total_bits": 61536832, "q_proj": { "group_size": { @@ -802,7 +802,7 @@ } }, { - "accuracy": 0.9907521335408092, + "accuracy": 0.9907488031312823, "total_bits": 62354560, "q_proj": { "group_size": { @@ -863,7 +863,7 @@ } }, { - "accuracy": 0.9944849004969001, + "accuracy": 0.9944888786412776, "total_bits": 75246880, "q_proj": { "group_size": { @@ -924,7 +924,7 @@ } }, { - "accuracy": 0.9953417172655463, + "accuracy": 0.9953421992249787, "total_bits": 76510336, "q_proj": { "group_size": { @@ -985,7 +985,7 @@ } }, { - "accuracy": 0.9965903947595507, + "accuracy": 0.9965901931282133, "total_bits": 85667104, "q_proj": { "group_size": { @@ -1037,7 +1037,7 @@ } }, { - "accuracy": 0.9975632540881634, + "accuracy": 0.9975674538873136, "total_bits": 91722880, "q_proj": { "group_size": { @@ -1089,7 +1089,7 @@ } }, { - "accuracy": 0.9990884908474982, + "accuracy": 0.9990892710047774, "total_bits": 113978656, "q_proj": { "group_size": { @@ -1143,7 +1143,7 @@ ], "model.layers.0.mlp": [ { - "accuracy": 0.8881748169660568, + "accuracy": 0.8882661387324333, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -1195,7 +1195,7 @@ } }, { - "accuracy": 0.8919973969459534, + "accuracy": 0.8919670283794403, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -1247,7 +1247,7 @@ } }, { - "accuracy": 0.9069116935133934, + "accuracy": 0.9069375917315483, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -1296,7 +1296,7 @@ } }, { - "accuracy": 0.9124017059803009, + "accuracy": 0.9124317467212677, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -1345,7 +1345,7 @@ } }, { - "accuracy": 0.9430184848606586, + "accuracy": 0.9430231153964996, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -1397,7 +1397,7 @@ } }, { - "accuracy": 0.9480531290173531, + "accuracy": 0.9480909705162048, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -1449,7 +1449,7 @@ } }, { - "accuracy": 0.9544955678284168, + "accuracy": 0.9545128084719181, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -1498,7 +1498,7 @@ } }, { - "accuracy": 0.968281302601099, + "accuracy": 0.9682874381542206, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -1541,7 +1541,7 @@ } }, { - "accuracy": 0.9713601395487785, + "accuracy": 0.971364825963974, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -1584,7 +1584,7 @@ } }, { - "accuracy": 0.9707059450447559, + "accuracy": 0.9707299135625362, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -1636,7 +1636,7 @@ } }, { - "accuracy": 0.9745050817728043, + "accuracy": 0.974507249891758, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -1688,7 +1688,7 @@ } }, { - "accuracy": 0.9848953913897276, + "accuracy": 0.9848882537335157, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -1740,7 +1740,7 @@ } }, { - "accuracy": 0.9872103426605463, + "accuracy": 0.9872004305943847, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -1792,7 +1792,7 @@ } }, { - "accuracy": 0.9911489551886916, + "accuracy": 0.9911568406969309, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -1835,7 +1835,7 @@ } }, { - "accuracy": 0.9923222842626274, + "accuracy": 0.9923209701664746, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -1884,7 +1884,7 @@ } }, { - "accuracy": 0.9939925689250231, + "accuracy": 0.9939967971295118, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -1930,7 +1930,7 @@ } }, { - "accuracy": 0.9975718185305595, + "accuracy": 0.9975708881393075, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -1972,7 +1972,7 @@ ], "model.layers.1.self_attn": [ { - "accuracy": 0.9072708263993263, + "accuracy": 0.9073722139000893, "total_bits": 30308928, "q_proj": { "group_size": { @@ -2036,7 +2036,7 @@ } }, { - "accuracy": 0.9114431366324425, + "accuracy": 0.91156817227602, "total_bits": 31455808, "q_proj": { "group_size": { @@ -2100,7 +2100,7 @@ } }, { - "accuracy": 0.9211425334215164, + "accuracy": 0.92121671885252, "total_bits": 33412832, "q_proj": { "group_size": { @@ -2164,7 +2164,7 @@ } }, { - "accuracy": 0.940546628087759, + "accuracy": 0.9407354593276978, "total_bits": 37983200, "q_proj": { "group_size": { @@ -2228,7 +2228,7 @@ } }, { - "accuracy": 0.9535201378166676, + "accuracy": 0.9535512514412403, "total_bits": 44838176, "q_proj": { "group_size": { @@ -2292,7 +2292,7 @@ } }, { - "accuracy": 0.9539150819182396, + "accuracy": 0.953955851495266, "total_bits": 44912768, "q_proj": { "group_size": { @@ -2356,7 +2356,7 @@ } }, { - "accuracy": 0.9746329039335251, + "accuracy": 0.9746336918324232, "total_bits": 57355552, "q_proj": { "group_size": { @@ -2408,7 +2408,7 @@ } }, { - "accuracy": 0.9750404637306929, + "accuracy": 0.9750551991164684, "total_bits": 57430144, "q_proj": { "group_size": { @@ -2460,7 +2460,7 @@ } }, { - "accuracy": 0.976738478988409, + "accuracy": 0.976717684417963, "total_bits": 57950464, "q_proj": { "group_size": { @@ -2512,7 +2512,7 @@ } }, { - "accuracy": 0.9780394490808249, + "accuracy": 0.9780480619519949, "total_bits": 58692736, "q_proj": { "group_size": { @@ -2564,7 +2564,7 @@ } }, { - "accuracy": 0.9763826839625835, + "accuracy": 0.9763856939971447, "total_bits": 59068544, "q_proj": { "group_size": { @@ -2628,7 +2628,7 @@ } }, { - "accuracy": 0.9780621360987425, + "accuracy": 0.9780414216220379, "total_bits": 59588864, "q_proj": { "group_size": { @@ -2692,7 +2692,7 @@ } }, { - "accuracy": 0.9801010321825743, + "accuracy": 0.9800688195973635, "total_bits": 61536832, "q_proj": { "group_size": { @@ -2753,7 +2753,7 @@ } }, { - "accuracy": 0.981600085273385, + "accuracy": 0.9815671853721142, "total_bits": 62354560, "q_proj": { "group_size": { @@ -2814,7 +2814,7 @@ } }, { - "accuracy": 0.9890466667711735, + "accuracy": 0.9890200421214104, "total_bits": 75246880, "q_proj": { "group_size": { @@ -2875,7 +2875,7 @@ } }, { - "accuracy": 0.9907460613176227, + "accuracy": 0.9907318344339728, "total_bits": 76510336, "q_proj": { "group_size": { @@ -2936,7 +2936,7 @@ } }, { - "accuracy": 0.9933391944505274, + "accuracy": 0.9933400782756507, "total_bits": 85667104, "q_proj": { "group_size": { @@ -2988,7 +2988,7 @@ } }, { - "accuracy": 0.9952039420604706, + "accuracy": 0.9951940830796957, "total_bits": 91722880, "q_proj": { "group_size": { @@ -3040,7 +3040,7 @@ } }, { - "accuracy": 0.9982604767428711, + "accuracy": 0.9982599728973582, "total_bits": 113978656, "q_proj": { "group_size": { @@ -3094,7 +3094,7 @@ ], "model.layers.1.mlp": [ { - "accuracy": 0.8869373872876167, + "accuracy": 0.887041375041008, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -3146,7 +3146,7 @@ } }, { - "accuracy": 0.891030453145504, + "accuracy": 0.8909792527556419, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -3198,7 +3198,7 @@ } }, { - "accuracy": 0.9027609676122665, + "accuracy": 0.9027382656931877, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -3247,7 +3247,7 @@ } }, { - "accuracy": 0.9062561988830566, + "accuracy": 0.9062387868762016, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -3296,7 +3296,7 @@ } }, { - "accuracy": 0.9437772929668427, + "accuracy": 0.943806029856205, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -3348,7 +3348,7 @@ } }, { - "accuracy": 0.9482904970645905, + "accuracy": 0.9483064226806164, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -3400,7 +3400,7 @@ } }, { - "accuracy": 0.952780719846487, + "accuracy": 0.9527781158685684, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -3449,7 +3449,7 @@ } }, { - "accuracy": 0.9698743838816881, + "accuracy": 0.9698769953101873, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -3492,7 +3492,7 @@ } }, { - "accuracy": 0.9721903912723064, + "accuracy": 0.9722175262868404, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -3535,7 +3535,7 @@ } }, { - "accuracy": 0.9712930209934711, + "accuracy": 0.9713117778301239, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -3587,7 +3587,7 @@ } }, { - "accuracy": 0.9747709520161152, + "accuracy": 0.9747742302715778, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -3639,7 +3639,7 @@ } }, { - "accuracy": 0.9853192111477256, + "accuracy": 0.9853277914226055, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -3691,7 +3691,7 @@ } }, { - "accuracy": 0.987357254140079, + "accuracy": 0.9873669799417257, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -3743,7 +3743,7 @@ } }, { - "accuracy": 0.9915956920012832, + "accuracy": 0.9915947569534183, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -3786,7 +3786,7 @@ } }, { - "accuracy": 0.9925124426372349, + "accuracy": 0.9925175970420241, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -3835,7 +3835,7 @@ } }, { - "accuracy": 0.9935486940667033, + "accuracy": 0.9935514670796692, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -3881,7 +3881,7 @@ } }, { - "accuracy": 0.9976398337166756, + "accuracy": 0.9976405743509531, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -3923,7 +3923,7 @@ ], "model.layers.2.self_attn": [ { - "accuracy": 0.8655510395765305, + "accuracy": 0.8655463457107544, "total_bits": 30308928, "q_proj": { "group_size": { @@ -3987,7 +3987,7 @@ } }, { - "accuracy": 0.8742070347070694, + "accuracy": 0.874164268374443, "total_bits": 31455808, "q_proj": { "group_size": { @@ -4051,7 +4051,7 @@ } }, { - "accuracy": 0.8933860436081886, + "accuracy": 0.8934662640094757, "total_bits": 33412832, "q_proj": { "group_size": { @@ -4115,7 +4115,7 @@ } }, { - "accuracy": 0.9261169582605362, + "accuracy": 0.926240935921669, "total_bits": 37983200, "q_proj": { "group_size": { @@ -4179,7 +4179,7 @@ } }, { - "accuracy": 0.933709405362606, + "accuracy": 0.9337229505181313, "total_bits": 44838176, "q_proj": { "group_size": { @@ -4243,7 +4243,7 @@ } }, { - "accuracy": 0.9347171932458878, + "accuracy": 0.9346923530101776, "total_bits": 44912768, "q_proj": { "group_size": { @@ -4307,7 +4307,7 @@ } }, { - "accuracy": 0.9634591825306416, + "accuracy": 0.9634601436555386, "total_bits": 57355552, "q_proj": { "group_size": { @@ -4359,7 +4359,7 @@ } }, { - "accuracy": 0.9642514251172543, + "accuracy": 0.9642136543989182, "total_bits": 57430144, "q_proj": { "group_size": { @@ -4411,7 +4411,7 @@ } }, { - "accuracy": 0.9666738845407963, + "accuracy": 0.9666857272386551, "total_bits": 57950464, "q_proj": { "group_size": { @@ -4463,7 +4463,7 @@ } }, { - "accuracy": 0.9683135040104389, + "accuracy": 0.9683275371789932, "total_bits": 58692736, "q_proj": { "group_size": { @@ -4515,7 +4515,7 @@ } }, { - "accuracy": 0.966770775616169, + "accuracy": 0.9667858779430389, "total_bits": 59068544, "q_proj": { "group_size": { @@ -4579,7 +4579,7 @@ } }, { - "accuracy": 0.9689657036215067, + "accuracy": 0.9689403381198645, "total_bits": 59588864, "q_proj": { "group_size": { @@ -4643,7 +4643,7 @@ } }, { - "accuracy": 0.9732588063925505, + "accuracy": 0.9732514582574368, "total_bits": 61536832, "q_proj": { "group_size": { @@ -4704,7 +4704,7 @@ } }, { - "accuracy": 0.9749700985848904, + "accuracy": 0.9749796185642481, "total_bits": 62354560, "q_proj": { "group_size": { @@ -4765,7 +4765,7 @@ } }, { - "accuracy": 0.985321925021708, + "accuracy": 0.9853210505098104, "total_bits": 75246880, "q_proj": { "group_size": { @@ -4826,7 +4826,7 @@ } }, { - "accuracy": 0.9874144792556763, + "accuracy": 0.9874111460521817, "total_bits": 76510336, "q_proj": { "group_size": { @@ -4887,7 +4887,7 @@ } }, { - "accuracy": 0.9904179144650698, + "accuracy": 0.9904154865071177, "total_bits": 85667104, "q_proj": { "group_size": { @@ -4939,7 +4939,7 @@ } }, { - "accuracy": 0.9935114970430732, + "accuracy": 0.9935085913166404, "total_bits": 91722880, "q_proj": { "group_size": { @@ -4991,7 +4991,7 @@ } }, { - "accuracy": 0.9975107992067933, + "accuracy": 0.9975118087604642, "total_bits": 113978656, "q_proj": { "group_size": { @@ -5045,7 +5045,7 @@ ], "model.layers.2.mlp": [ { - "accuracy": 0.8512221872806549, + "accuracy": 0.851137638092041, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -5097,7 +5097,7 @@ } }, { - "accuracy": 0.8556944578886032, + "accuracy": 0.8557578176259995, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -5149,7 +5149,7 @@ } }, { - "accuracy": 0.8742920905351639, + "accuracy": 0.8742973953485489, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -5198,7 +5198,7 @@ } }, { - "accuracy": 0.8798726350069046, + "accuracy": 0.8798764571547508, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -5247,7 +5247,7 @@ } }, { - "accuracy": 0.9260013550519943, + "accuracy": 0.9259674102067947, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -5299,7 +5299,7 @@ } }, { - "accuracy": 0.9317035898566246, + "accuracy": 0.9316787645220757, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -5351,7 +5351,7 @@ } }, { - "accuracy": 0.9391488991677761, + "accuracy": 0.9391309916973114, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -5400,7 +5400,7 @@ } }, { - "accuracy": 0.9611527696251869, + "accuracy": 0.9611707739531994, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -5443,7 +5443,7 @@ } }, { - "accuracy": 0.9643270559608936, + "accuracy": 0.964320108294487, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -5486,7 +5486,7 @@ } }, { - "accuracy": 0.9622061587870121, + "accuracy": 0.962238498032093, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -5538,7 +5538,7 @@ } }, { - "accuracy": 0.966707780957222, + "accuracy": 0.9667122215032578, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -5590,7 +5590,7 @@ } }, { - "accuracy": 0.9806768018752337, + "accuracy": 0.9806710071861744, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -5642,7 +5642,7 @@ } }, { - "accuracy": 0.9833411537110806, + "accuracy": 0.9833368640393019, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -5694,7 +5694,7 @@ } }, { - "accuracy": 0.9893169151619077, + "accuracy": 0.98932437133044, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -5737,7 +5737,7 @@ } }, { - "accuracy": 0.9900901559740305, + "accuracy": 0.990092589519918, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -5786,7 +5786,7 @@ } }, { - "accuracy": 0.9917803723365068, + "accuracy": 0.9917868180200458, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -5832,7 +5832,7 @@ } }, { - "accuracy": 0.997135940939188, + "accuracy": 0.9971363425720483, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -5874,7 +5874,7 @@ ], "model.layers.3.self_attn": [ { - "accuracy": 0.8845625817775726, + "accuracy": 0.8846139460802078, "total_bits": 30308928, "q_proj": { "group_size": { @@ -5938,7 +5938,7 @@ } }, { - "accuracy": 0.8901228457689285, + "accuracy": 0.8900518491864204, "total_bits": 31455808, "q_proj": { "group_size": { @@ -6002,7 +6002,7 @@ } }, { - "accuracy": 0.9061683937907219, + "accuracy": 0.9061124995350838, "total_bits": 33412832, "q_proj": { "group_size": { @@ -6066,7 +6066,7 @@ } }, { - "accuracy": 0.9347674697637558, + "accuracy": 0.9347528889775276, "total_bits": 37983200, "q_proj": { "group_size": { @@ -6130,7 +6130,7 @@ } }, { - "accuracy": 0.9422827735543251, + "accuracy": 0.9422792084515095, "total_bits": 44838176, "q_proj": { "group_size": { @@ -6194,7 +6194,7 @@ } }, { - "accuracy": 0.943145889788866, + "accuracy": 0.9431353025138378, "total_bits": 44912768, "q_proj": { "group_size": { @@ -6258,7 +6258,7 @@ } }, { - "accuracy": 0.9687017947435379, + "accuracy": 0.9687346704304218, "total_bits": 57355552, "q_proj": { "group_size": { @@ -6310,7 +6310,7 @@ } }, { - "accuracy": 0.969449121505022, + "accuracy": 0.9694707691669464, "total_bits": 57430144, "q_proj": { "group_size": { @@ -6362,7 +6362,7 @@ } }, { - "accuracy": 0.9714747574180365, + "accuracy": 0.9715053513646126, "total_bits": 57950464, "q_proj": { "group_size": { @@ -6414,7 +6414,7 @@ } }, { - "accuracy": 0.9727979246526957, + "accuracy": 0.9728143084794283, "total_bits": 58692736, "q_proj": { "group_size": { @@ -6466,7 +6466,7 @@ } }, { - "accuracy": 0.9711320530623198, + "accuracy": 0.9711146093904972, "total_bits": 59068544, "q_proj": { "group_size": { @@ -6530,7 +6530,7 @@ } }, { - "accuracy": 0.9730110038071871, + "accuracy": 0.9730188455432653, "total_bits": 59588864, "q_proj": { "group_size": { @@ -6594,7 +6594,7 @@ } }, { - "accuracy": 0.9766805954277515, + "accuracy": 0.9766947887837887, "total_bits": 61536832, "q_proj": { "group_size": { @@ -6655,7 +6655,7 @@ } }, { - "accuracy": 0.9783372972160578, + "accuracy": 0.9783317521214485, "total_bits": 62354560, "q_proj": { "group_size": { @@ -6716,7 +6716,7 @@ } }, { - "accuracy": 0.9873075932264328, + "accuracy": 0.9873018255457282, "total_bits": 75246880, "q_proj": { "group_size": { @@ -6777,7 +6777,7 @@ } }, { - "accuracy": 0.9891520766541362, + "accuracy": 0.9891604781150818, "total_bits": 76510336, "q_proj": { "group_size": { @@ -6838,7 +6838,7 @@ } }, { - "accuracy": 0.9918732857331634, + "accuracy": 0.9918712023645639, "total_bits": 85667104, "q_proj": { "group_size": { @@ -6890,7 +6890,7 @@ } }, { - "accuracy": 0.994609275367111, + "accuracy": 0.9946046634577215, "total_bits": 91722880, "q_proj": { "group_size": { @@ -6942,7 +6942,7 @@ } }, { - "accuracy": 0.9978871110361069, + "accuracy": 0.9978904649615288, "total_bits": 113978656, "q_proj": { "group_size": { @@ -6996,7 +6996,7 @@ ], "model.layers.3.mlp": [ { - "accuracy": 0.8215165734291077, + "accuracy": 0.8215235769748688, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -7048,7 +7048,7 @@ } }, { - "accuracy": 0.8267818093299866, + "accuracy": 0.8267826288938522, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -7100,7 +7100,7 @@ } }, { - "accuracy": 0.8501903116703033, + "accuracy": 0.8501260429620743, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -7149,7 +7149,7 @@ } }, { - "accuracy": 0.8572592884302139, + "accuracy": 0.8572545945644379, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -7198,7 +7198,7 @@ } }, { - "accuracy": 0.9100245088338852, + "accuracy": 0.9100974574685097, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -7250,7 +7250,7 @@ } }, { - "accuracy": 0.9172510206699371, + "accuracy": 0.9171888679265976, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -7302,7 +7302,7 @@ } }, { - "accuracy": 0.9271175712347031, + "accuracy": 0.92706498503685, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -7351,7 +7351,7 @@ } }, { - "accuracy": 0.953081201761961, + "accuracy": 0.9530698768794537, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -7394,7 +7394,7 @@ } }, { - "accuracy": 0.9570418335497379, + "accuracy": 0.957017607986927, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -7437,7 +7437,7 @@ } }, { - "accuracy": 0.9540456458926201, + "accuracy": 0.9540662579238415, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -7489,7 +7489,7 @@ } }, { - "accuracy": 0.9596328996121883, + "accuracy": 0.9596161395311356, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -7541,7 +7541,7 @@ } }, { - "accuracy": 0.9765274990350008, + "accuracy": 0.9765002690255642, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -7593,7 +7593,7 @@ } }, { - "accuracy": 0.9798263423144817, + "accuracy": 0.979815537109971, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -7645,7 +7645,7 @@ } }, { - "accuracy": 0.9871442606672645, + "accuracy": 0.987149802967906, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -7688,7 +7688,7 @@ } }, { - "accuracy": 0.9879334066063166, + "accuracy": 0.9879286577925086, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -7737,7 +7737,7 @@ } }, { - "accuracy": 0.9901348492130637, + "accuracy": 0.9901374354958534, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -7783,7 +7783,7 @@ } }, { - "accuracy": 0.9965592622756958, + "accuracy": 0.9965564699377865, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -7825,7 +7825,7 @@ ], "model.layers.4.self_attn": [ { - "accuracy": 0.8741211891174316, + "accuracy": 0.8741328567266464, "total_bits": 30308928, "q_proj": { "group_size": { @@ -7889,7 +7889,7 @@ } }, { - "accuracy": 0.8816789761185646, + "accuracy": 0.8816472738981247, "total_bits": 31455808, "q_proj": { "group_size": { @@ -7953,7 +7953,7 @@ } }, { - "accuracy": 0.9003885015845299, + "accuracy": 0.900344654917717, "total_bits": 33412832, "q_proj": { "group_size": { @@ -8017,7 +8017,7 @@ } }, { - "accuracy": 0.9307805150747299, + "accuracy": 0.9307383000850677, "total_bits": 37983200, "q_proj": { "group_size": { @@ -8081,7 +8081,7 @@ } }, { - "accuracy": 0.9374072849750519, + "accuracy": 0.9374280199408531, "total_bits": 44838176, "q_proj": { "group_size": { @@ -8145,7 +8145,7 @@ } }, { - "accuracy": 0.9384637139737606, + "accuracy": 0.9384343661367893, "total_bits": 44912768, "q_proj": { "group_size": { @@ -8209,7 +8209,7 @@ } }, { - "accuracy": 0.9653116390109062, + "accuracy": 0.9653166085481644, "total_bits": 57355552, "q_proj": { "group_size": { @@ -8261,7 +8261,7 @@ } }, { - "accuracy": 0.9661814905703068, + "accuracy": 0.9661872051656246, "total_bits": 57430144, "q_proj": { "group_size": { @@ -8313,7 +8313,7 @@ } }, { - "accuracy": 0.9684111662209034, + "accuracy": 0.968420397490263, "total_bits": 57950464, "q_proj": { "group_size": { @@ -8365,7 +8365,7 @@ } }, { - "accuracy": 0.969778697937727, + "accuracy": 0.9697968028485775, "total_bits": 58692736, "q_proj": { "group_size": { @@ -8417,7 +8417,7 @@ } }, { - "accuracy": 0.9687559828162193, + "accuracy": 0.9687640070915222, "total_bits": 59068544, "q_proj": { "group_size": { @@ -8481,7 +8481,7 @@ } }, { - "accuracy": 0.9707713481038809, + "accuracy": 0.9707700200378895, "total_bits": 59588864, "q_proj": { "group_size": { @@ -8545,7 +8545,7 @@ } }, { - "accuracy": 0.975062221288681, + "accuracy": 0.9750581458210945, "total_bits": 61536832, "q_proj": { "group_size": { @@ -8606,7 +8606,7 @@ } }, { - "accuracy": 0.9766068141907454, + "accuracy": 0.9766066540032625, "total_bits": 62354560, "q_proj": { "group_size": { @@ -8667,7 +8667,7 @@ } }, { - "accuracy": 0.986367778852582, + "accuracy": 0.9863643515855074, "total_bits": 75246880, "q_proj": { "group_size": { @@ -8728,7 +8728,7 @@ } }, { - "accuracy": 0.9882961716502905, + "accuracy": 0.9882954657077789, "total_bits": 76510336, "q_proj": { "group_size": { @@ -8789,7 +8789,7 @@ } }, { - "accuracy": 0.9909742707386613, + "accuracy": 0.9909781841561198, "total_bits": 85667104, "q_proj": { "group_size": { @@ -8841,7 +8841,7 @@ } }, { - "accuracy": 0.9939752677455544, + "accuracy": 0.9939774051308632, "total_bits": 91722880, "q_proj": { "group_size": { @@ -8893,7 +8893,7 @@ } }, { - "accuracy": 0.9976646257564425, + "accuracy": 0.9976642981637269, "total_bits": 113978656, "q_proj": { "group_size": { @@ -8947,7 +8947,7 @@ ], "model.layers.4.mlp": [ { - "accuracy": 0.8349805325269699, + "accuracy": 0.8348695486783981, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -8999,7 +8999,7 @@ } }, { - "accuracy": 0.8401748985052109, + "accuracy": 0.8400902897119522, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -9051,7 +9051,7 @@ } }, { - "accuracy": 0.8632630258798599, + "accuracy": 0.863140657544136, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -9100,7 +9100,7 @@ } }, { - "accuracy": 0.8702157586812973, + "accuracy": 0.870071679353714, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -9149,7 +9149,7 @@ } }, { - "accuracy": 0.9168255552649498, + "accuracy": 0.9167942702770233, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -9201,7 +9201,7 @@ } }, { - "accuracy": 0.9239948987960815, + "accuracy": 0.9239982962608337, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -9253,7 +9253,7 @@ } }, { - "accuracy": 0.933478482067585, + "accuracy": 0.9335077553987503, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -9302,7 +9302,7 @@ } }, { - "accuracy": 0.9563411399722099, + "accuracy": 0.9563678838312626, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -9345,7 +9345,7 @@ } }, { - "accuracy": 0.960248950868845, + "accuracy": 0.960248876363039, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -9388,7 +9388,7 @@ } }, { - "accuracy": 0.9574717655777931, + "accuracy": 0.9574565887451172, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -9440,7 +9440,7 @@ } }, { - "accuracy": 0.9629541300237179, + "accuracy": 0.9629608169198036, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -9492,7 +9492,7 @@ } }, { - "accuracy": 0.9782424960285425, + "accuracy": 0.9782421588897705, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -9544,7 +9544,7 @@ } }, { - "accuracy": 0.9814701918512583, + "accuracy": 0.9814739804714918, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -9596,7 +9596,7 @@ } }, { - "accuracy": 0.9879318736493587, + "accuracy": 0.9879294466227293, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -9639,7 +9639,7 @@ } }, { - "accuracy": 0.9888098947703838, + "accuracy": 0.988809896633029, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -9688,7 +9688,7 @@ } }, { - "accuracy": 0.9909165976569057, + "accuracy": 0.9909201338887215, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -9734,7 +9734,7 @@ } }, { - "accuracy": 0.9967805305495858, + "accuracy": 0.9967838707379997, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -9776,7 +9776,7 @@ ], "model.layers.5.self_attn": [ { - "accuracy": 0.8810675218701363, + "accuracy": 0.8811230733990669, "total_bits": 30308928, "q_proj": { "group_size": { @@ -9840,7 +9840,7 @@ } }, { - "accuracy": 0.8886952474713326, + "accuracy": 0.8887623399496078, "total_bits": 31455808, "q_proj": { "group_size": { @@ -9904,7 +9904,7 @@ } }, { - "accuracy": 0.9045190438628197, + "accuracy": 0.904540665447712, "total_bits": 33412832, "q_proj": { "group_size": { @@ -9968,7 +9968,7 @@ } }, { - "accuracy": 0.9352795407176018, + "accuracy": 0.9353184551000595, "total_bits": 37983200, "q_proj": { "group_size": { @@ -10032,7 +10032,7 @@ } }, { - "accuracy": 0.9399208948016167, + "accuracy": 0.9399426616728306, "total_bits": 44838176, "q_proj": { "group_size": { @@ -10096,7 +10096,7 @@ } }, { - "accuracy": 0.9410183131694794, + "accuracy": 0.9409984610974789, "total_bits": 44912768, "q_proj": { "group_size": { @@ -10160,7 +10160,7 @@ } }, { - "accuracy": 0.9663527235388756, + "accuracy": 0.9663707427680492, "total_bits": 57355552, "q_proj": { "group_size": { @@ -10212,7 +10212,7 @@ } }, { - "accuracy": 0.967367872595787, + "accuracy": 0.9673691280186176, "total_bits": 57430144, "q_proj": { "group_size": { @@ -10264,7 +10264,7 @@ } }, { - "accuracy": 0.9696921166032553, + "accuracy": 0.9697138108313084, "total_bits": 57950464, "q_proj": { "group_size": { @@ -10316,7 +10316,7 @@ } }, { - "accuracy": 0.9711299203336239, + "accuracy": 0.9711332526057959, "total_bits": 58692736, "q_proj": { "group_size": { @@ -10368,7 +10368,7 @@ } }, { - "accuracy": 0.9700027704238892, + "accuracy": 0.9700067639350891, "total_bits": 59068544, "q_proj": { "group_size": { @@ -10432,7 +10432,7 @@ } }, { - "accuracy": 0.9720256011933088, + "accuracy": 0.9720334056764841, "total_bits": 59588864, "q_proj": { "group_size": { @@ -10496,7 +10496,7 @@ } }, { - "accuracy": 0.9756313841789961, + "accuracy": 0.9756301864981651, "total_bits": 61536832, "q_proj": { "group_size": { @@ -10557,7 +10557,7 @@ } }, { - "accuracy": 0.9773869439959526, + "accuracy": 0.9774045348167419, "total_bits": 62354560, "q_proj": { "group_size": { @@ -10618,7 +10618,7 @@ } }, { - "accuracy": 0.9866560539230704, + "accuracy": 0.9866563268005848, "total_bits": 75246880, "q_proj": { "group_size": { @@ -10679,7 +10679,7 @@ } }, { - "accuracy": 0.9886289816349745, + "accuracy": 0.9886361388489604, "total_bits": 76510336, "q_proj": { "group_size": { @@ -10740,7 +10740,7 @@ } }, { - "accuracy": 0.9912481410428882, + "accuracy": 0.9912527557462454, "total_bits": 85667104, "q_proj": { "group_size": { @@ -10792,7 +10792,7 @@ } }, { - "accuracy": 0.9942180886864662, + "accuracy": 0.9942179205827415, "total_bits": 91722880, "q_proj": { "group_size": { @@ -10844,7 +10844,7 @@ } }, { - "accuracy": 0.9977244043257087, + "accuracy": 0.9977258909493685, "total_bits": 113978656, "q_proj": { "group_size": { @@ -10898,7 +10898,7 @@ ], "model.layers.5.mlp": [ { - "accuracy": 0.8648017942905426, + "accuracy": 0.8648174107074738, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -10950,7 +10950,7 @@ } }, { - "accuracy": 0.8694759905338287, + "accuracy": 0.8695154935121536, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -11002,7 +11002,7 @@ } }, { - "accuracy": 0.8880036026239395, + "accuracy": 0.888036236166954, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -11051,7 +11051,7 @@ } }, { - "accuracy": 0.8937847763299942, + "accuracy": 0.8938298374414444, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -11100,7 +11100,7 @@ } }, { - "accuracy": 0.9318181350827217, + "accuracy": 0.931794673204422, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -11152,7 +11152,7 @@ } }, { - "accuracy": 0.9377684108912945, + "accuracy": 0.9377873539924622, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -11204,7 +11204,7 @@ } }, { - "accuracy": 0.9455864503979683, + "accuracy": 0.9456103965640068, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -11253,7 +11253,7 @@ } }, { - "accuracy": 0.9638955146074295, + "accuracy": 0.963904220610857, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -11296,7 +11296,7 @@ } }, { - "accuracy": 0.9671880304813385, + "accuracy": 0.9671907536685467, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -11339,7 +11339,7 @@ } }, { - "accuracy": 0.9650383368134499, + "accuracy": 0.9650482423603535, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -11391,7 +11391,7 @@ } }, { - "accuracy": 0.9695848729461432, + "accuracy": 0.9695756994187832, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -11443,7 +11443,7 @@ } }, { - "accuracy": 0.9821004029363394, + "accuracy": 0.982111718505621, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -11495,7 +11495,7 @@ } }, { - "accuracy": 0.9847700102254748, + "accuracy": 0.984773620031774, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -11547,7 +11547,7 @@ } }, { - "accuracy": 0.990053579211235, + "accuracy": 0.9900560518726707, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -11590,7 +11590,7 @@ } }, { - "accuracy": 0.9907908504828811, + "accuracy": 0.990793714299798, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -11639,7 +11639,7 @@ } }, { - "accuracy": 0.992512381169945, + "accuracy": 0.992513523902744, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -11685,7 +11685,7 @@ } }, { - "accuracy": 0.9973284220322967, + "accuracy": 0.997328422497958, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -11727,7 +11727,7 @@ ], "model.layers.6.self_attn": [ { - "accuracy": 0.8890528008341789, + "accuracy": 0.8890272378921509, "total_bits": 30308928, "q_proj": { "group_size": { @@ -11791,7 +11791,7 @@ } }, { - "accuracy": 0.8953105807304382, + "accuracy": 0.8953426331281662, "total_bits": 31455808, "q_proj": { "group_size": { @@ -11855,7 +11855,7 @@ } }, { - "accuracy": 0.9070602431893349, + "accuracy": 0.9071049243211746, "total_bits": 33412832, "q_proj": { "group_size": { @@ -11919,7 +11919,7 @@ } }, { - "accuracy": 0.9330306127667427, + "accuracy": 0.9330921769142151, "total_bits": 37983200, "q_proj": { "group_size": { @@ -11983,7 +11983,7 @@ } }, { - "accuracy": 0.9448112137615681, + "accuracy": 0.9447792358696461, "total_bits": 44838176, "q_proj": { "group_size": { @@ -12047,7 +12047,7 @@ } }, { - "accuracy": 0.9454820677638054, + "accuracy": 0.9455131739377975, "total_bits": 44912768, "q_proj": { "group_size": { @@ -12111,7 +12111,7 @@ } }, { - "accuracy": 0.9695140719413757, + "accuracy": 0.969525508582592, "total_bits": 57355552, "q_proj": { "group_size": { @@ -12163,7 +12163,7 @@ } }, { - "accuracy": 0.9701519533991814, + "accuracy": 0.9701721202582121, "total_bits": 57430144, "q_proj": { "group_size": { @@ -12215,7 +12215,7 @@ } }, { - "accuracy": 0.972233023494482, + "accuracy": 0.9722142405807972, "total_bits": 57950464, "q_proj": { "group_size": { @@ -12267,7 +12267,7 @@ } }, { - "accuracy": 0.9737806580960751, + "accuracy": 0.9737892020493746, "total_bits": 58692736, "q_proj": { "group_size": { @@ -12319,7 +12319,7 @@ } }, { - "accuracy": 0.9722792375832796, + "accuracy": 0.9722646549344063, "total_bits": 59068544, "q_proj": { "group_size": { @@ -12383,7 +12383,7 @@ } }, { - "accuracy": 0.9740959145128727, + "accuracy": 0.9741003829985857, "total_bits": 59588864, "q_proj": { "group_size": { @@ -12447,7 +12447,7 @@ } }, { - "accuracy": 0.9769062623381615, + "accuracy": 0.9769276678562164, "total_bits": 61536832, "q_proj": { "group_size": { @@ -12508,7 +12508,7 @@ } }, { - "accuracy": 0.9786777105182409, + "accuracy": 0.9786724224686623, "total_bits": 62354560, "q_proj": { "group_size": { @@ -12569,7 +12569,7 @@ } }, { - "accuracy": 0.987405676394701, + "accuracy": 0.9874210571870208, "total_bits": 75246880, "q_proj": { "group_size": { @@ -12630,7 +12630,7 @@ } }, { - "accuracy": 0.9893234008923173, + "accuracy": 0.9893340524286032, "total_bits": 76510336, "q_proj": { "group_size": { @@ -12691,7 +12691,7 @@ } }, { - "accuracy": 0.9920955216512084, + "accuracy": 0.9920956883579493, "total_bits": 85667104, "q_proj": { "group_size": { @@ -12743,7 +12743,7 @@ } }, { - "accuracy": 0.9945340054109693, + "accuracy": 0.9945395006798208, "total_bits": 91722880, "q_proj": { "group_size": { @@ -12795,7 +12795,7 @@ } }, { - "accuracy": 0.9979429002851248, + "accuracy": 0.9979436090216041, "total_bits": 113978656, "q_proj": { "group_size": { @@ -12849,7 +12849,7 @@ ], "model.layers.6.mlp": [ { - "accuracy": 0.8601508587598801, + "accuracy": 0.860052615404129, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -12901,7 +12901,7 @@ } }, { - "accuracy": 0.8646412193775177, + "accuracy": 0.864541083574295, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -12953,7 +12953,7 @@ } }, { - "accuracy": 0.883434846997261, + "accuracy": 0.8833191320300102, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -13002,7 +13002,7 @@ } }, { - "accuracy": 0.8892851322889328, + "accuracy": 0.8891885280609131, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -13051,7 +13051,7 @@ } }, { - "accuracy": 0.9288300052285194, + "accuracy": 0.928782157599926, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -13103,7 +13103,7 @@ } }, { - "accuracy": 0.9350205287337303, + "accuracy": 0.9350093528628349, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -13155,7 +13155,7 @@ } }, { - "accuracy": 0.942976824939251, + "accuracy": 0.9429488480091095, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -13204,7 +13204,7 @@ } }, { - "accuracy": 0.9626522436738014, + "accuracy": 0.9626510068774223, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -13247,7 +13247,7 @@ } }, { - "accuracy": 0.9660748802125454, + "accuracy": 0.9660806246101856, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -13290,7 +13290,7 @@ } }, { - "accuracy": 0.9634818024933338, + "accuracy": 0.9634533040225506, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -13342,7 +13342,7 @@ } }, { - "accuracy": 0.9682779163122177, + "accuracy": 0.9682538993656635, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -13394,7 +13394,7 @@ } }, { - "accuracy": 0.9812953136861324, + "accuracy": 0.9812956769019365, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -13446,7 +13446,7 @@ } }, { - "accuracy": 0.9841256625950336, + "accuracy": 0.9841305427253246, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -13498,7 +13498,7 @@ } }, { - "accuracy": 0.9896932039409876, + "accuracy": 0.9896902348846197, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -13541,7 +13541,7 @@ } }, { - "accuracy": 0.9903836958110332, + "accuracy": 0.9903781078755856, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -13590,7 +13590,7 @@ } }, { - "accuracy": 0.9921105708926916, + "accuracy": 0.9921116251498461, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -13636,7 +13636,7 @@ } }, { - "accuracy": 0.9972038897685707, + "accuracy": 0.9972023773007095, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -13678,7 +13678,7 @@ ], "model.layers.7.self_attn": [ { - "accuracy": 0.8790968656539917, + "accuracy": 0.879193864762783, "total_bits": 30308928, "q_proj": { "group_size": { @@ -13742,7 +13742,7 @@ } }, { - "accuracy": 0.8855356946587563, + "accuracy": 0.885476142168045, "total_bits": 31455808, "q_proj": { "group_size": { @@ -13806,7 +13806,7 @@ } }, { - "accuracy": 0.9016124606132507, + "accuracy": 0.9015669599175453, "total_bits": 33412832, "q_proj": { "group_size": { @@ -13870,7 +13870,7 @@ } }, { - "accuracy": 0.9258091002702713, + "accuracy": 0.9257946684956551, "total_bits": 37983200, "q_proj": { "group_size": { @@ -13934,7 +13934,7 @@ } }, { - "accuracy": 0.9374926388263702, + "accuracy": 0.9375413469970226, "total_bits": 44838176, "q_proj": { "group_size": { @@ -13998,7 +13998,7 @@ } }, { - "accuracy": 0.9384367614984512, + "accuracy": 0.9384141005575657, "total_bits": 44912768, "q_proj": { "group_size": { @@ -14062,7 +14062,7 @@ } }, { - "accuracy": 0.9647591635584831, + "accuracy": 0.9647888205945492, "total_bits": 57355552, "q_proj": { "group_size": { @@ -14114,7 +14114,7 @@ } }, { - "accuracy": 0.965646505355835, + "accuracy": 0.9656654335558414, "total_bits": 57430144, "q_proj": { "group_size": { @@ -14166,7 +14166,7 @@ } }, { - "accuracy": 0.9682505577802658, + "accuracy": 0.968209270387888, "total_bits": 57950464, "q_proj": { "group_size": { @@ -14218,7 +14218,7 @@ } }, { - "accuracy": 0.9700952749699354, + "accuracy": 0.9700861666351557, "total_bits": 58692736, "q_proj": { "group_size": { @@ -14270,7 +14270,7 @@ } }, { - "accuracy": 0.9685632511973381, + "accuracy": 0.9685384854674339, "total_bits": 59068544, "q_proj": { "group_size": { @@ -14334,7 +14334,7 @@ } }, { - "accuracy": 0.9708435665816069, + "accuracy": 0.9708608984947205, "total_bits": 59588864, "q_proj": { "group_size": { @@ -14398,7 +14398,7 @@ } }, { - "accuracy": 0.9744916334748268, + "accuracy": 0.9745250958949327, "total_bits": 61536832, "q_proj": { "group_size": { @@ -14459,7 +14459,7 @@ } }, { - "accuracy": 0.976450975984335, + "accuracy": 0.9764064699411392, "total_bits": 62354560, "q_proj": { "group_size": { @@ -14520,7 +14520,7 @@ } }, { - "accuracy": 0.9858461897820234, + "accuracy": 0.9858506005257368, "total_bits": 75246880, "q_proj": { "group_size": { @@ -14581,7 +14581,7 @@ } }, { - "accuracy": 0.9881619503721595, + "accuracy": 0.9881491707637906, "total_bits": 76510336, "q_proj": { "group_size": { @@ -14642,7 +14642,7 @@ } }, { - "accuracy": 0.9907449893653393, + "accuracy": 0.9907346172258258, "total_bits": 85667104, "q_proj": { "group_size": { @@ -14694,7 +14694,7 @@ } }, { - "accuracy": 0.9937627115286887, + "accuracy": 0.9937750529497862, "total_bits": 91722880, "q_proj": { "group_size": { @@ -14746,7 +14746,7 @@ } }, { - "accuracy": 0.9975894596427679, + "accuracy": 0.997590501094237, "total_bits": 113978656, "q_proj": { "group_size": { @@ -14800,7 +14800,7 @@ ], "model.layers.7.mlp": [ { - "accuracy": 0.8853595852851868, + "accuracy": 0.8853648155927658, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -14852,7 +14852,7 @@ } }, { - "accuracy": 0.8890897259116173, + "accuracy": 0.8890001177787781, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -14904,7 +14904,7 @@ } }, { - "accuracy": 0.9030437618494034, + "accuracy": 0.9031427130103111, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -14953,7 +14953,7 @@ } }, { - "accuracy": 0.9074160978198051, + "accuracy": 0.9074975475668907, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -15002,7 +15002,7 @@ } }, { - "accuracy": 0.941648468375206, + "accuracy": 0.9416834153234959, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -15054,7 +15054,7 @@ } }, { - "accuracy": 0.9470459222793579, + "accuracy": 0.9470086395740509, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -15106,7 +15106,7 @@ } }, { - "accuracy": 0.9530564919114113, + "accuracy": 0.9530163891613483, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -15155,7 +15155,7 @@ } }, { - "accuracy": 0.9691261779516935, + "accuracy": 0.9691323079168797, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -15198,7 +15198,7 @@ } }, { - "accuracy": 0.9720455892384052, + "accuracy": 0.9720457717776299, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -15241,7 +15241,7 @@ } }, { - "accuracy": 0.9700704663991928, + "accuracy": 0.9700757917016745, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -15293,7 +15293,7 @@ } }, { - "accuracy": 0.9741590898483992, + "accuracy": 0.9741606414318085, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -15345,7 +15345,7 @@ } }, { - "accuracy": 0.9846790870651603, + "accuracy": 0.9846808835864067, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -15397,7 +15397,7 @@ } }, { - "accuracy": 0.9870709776878357, + "accuracy": 0.9870640141889453, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -15449,7 +15449,7 @@ } }, { - "accuracy": 0.9914329303428531, + "accuracy": 0.9914321266114712, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -15492,7 +15492,7 @@ } }, { - "accuracy": 0.9921443285420537, + "accuracy": 0.9921473953872919, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -15541,7 +15541,7 @@ } }, { - "accuracy": 0.9934511734172702, + "accuracy": 0.9934548917226493, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -15587,7 +15587,7 @@ } }, { - "accuracy": 0.997653994243592, + "accuracy": 0.9976542603690177, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -15629,7 +15629,7 @@ ], "model.layers.8.self_attn": [ { - "accuracy": 0.9262347742915154, + "accuracy": 0.9262180775403976, "total_bits": 30308928, "q_proj": { "group_size": { @@ -15693,7 +15693,7 @@ } }, { - "accuracy": 0.9292053729295731, + "accuracy": 0.9292021617293358, "total_bits": 31455808, "q_proj": { "group_size": { @@ -15757,7 +15757,7 @@ } }, { - "accuracy": 0.9359699636697769, + "accuracy": 0.9360192567110062, "total_bits": 33412832, "q_proj": { "group_size": { @@ -15821,7 +15821,7 @@ } }, { - "accuracy": 0.9522256478667259, + "accuracy": 0.9522725865244865, "total_bits": 37983200, "q_proj": { "group_size": { @@ -15885,7 +15885,7 @@ } }, { - "accuracy": 0.9621881134808064, + "accuracy": 0.9622119329869747, "total_bits": 44838176, "q_proj": { "group_size": { @@ -15949,7 +15949,7 @@ } }, { - "accuracy": 0.9625761918723583, + "accuracy": 0.9625555910170078, "total_bits": 44912768, "q_proj": { "group_size": { @@ -16013,7 +16013,7 @@ } }, { - "accuracy": 0.9790825769305229, + "accuracy": 0.9790912084281445, "total_bits": 57355552, "q_proj": { "group_size": { @@ -16065,7 +16065,7 @@ } }, { - "accuracy": 0.9794511869549751, + "accuracy": 0.9794544521719217, "total_bits": 57430144, "q_proj": { "group_size": { @@ -16117,7 +16117,7 @@ } }, { - "accuracy": 0.9807918332517147, + "accuracy": 0.9808075986802578, "total_bits": 57950464, "q_proj": { "group_size": { @@ -16169,7 +16169,7 @@ } }, { - "accuracy": 0.9819746408611536, + "accuracy": 0.9819814618676901, "total_bits": 58692736, "q_proj": { "group_size": { @@ -16221,7 +16221,7 @@ } }, { - "accuracy": 0.9808401670306921, + "accuracy": 0.980836171656847, "total_bits": 59068544, "q_proj": { "group_size": { @@ -16285,7 +16285,7 @@ } }, { - "accuracy": 0.9820886384695768, + "accuracy": 0.9820813406258821, "total_bits": 59588864, "q_proj": { "group_size": { @@ -16349,7 +16349,7 @@ } }, { - "accuracy": 0.9836654253304005, + "accuracy": 0.9836451895534992, "total_bits": 61536832, "q_proj": { "group_size": { @@ -16410,7 +16410,7 @@ } }, { - "accuracy": 0.984879620373249, + "accuracy": 0.9848749991506338, "total_bits": 62354560, "q_proj": { "group_size": { @@ -16471,7 +16471,7 @@ } }, { - "accuracy": 0.9910424621775746, + "accuracy": 0.9910451974719763, "total_bits": 75246880, "q_proj": { "group_size": { @@ -16532,7 +16532,7 @@ } }, { - "accuracy": 0.9924080655910075, + "accuracy": 0.9923991961404681, "total_bits": 76510336, "q_proj": { "group_size": { @@ -16593,7 +16593,7 @@ } }, { - "accuracy": 0.9945205664262176, + "accuracy": 0.9945177044719458, "total_bits": 85667104, "q_proj": { "group_size": { @@ -16645,7 +16645,7 @@ } }, { - "accuracy": 0.996047873981297, + "accuracy": 0.9960471182130277, "total_bits": 91722880, "q_proj": { "group_size": { @@ -16697,7 +16697,7 @@ } }, { - "accuracy": 0.998559134779498, + "accuracy": 0.9985587185947224, "total_bits": 113978656, "q_proj": { "group_size": { @@ -16751,7 +16751,7 @@ ], "model.layers.8.mlp": [ { - "accuracy": 0.8776565492153168, + "accuracy": 0.8774635121226311, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -16803,7 +16803,7 @@ } }, { - "accuracy": 0.8816645741462708, + "accuracy": 0.8816986083984375, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -16855,7 +16855,7 @@ } }, { - "accuracy": 0.8966159075498581, + "accuracy": 0.8964935094118118, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -16904,7 +16904,7 @@ } }, { - "accuracy": 0.9011181369423866, + "accuracy": 0.9010185077786446, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -16953,7 +16953,7 @@ } }, { - "accuracy": 0.9383117109537125, + "accuracy": 0.9382704459130764, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -17005,7 +17005,7 @@ } }, { - "accuracy": 0.9435076154768467, + "accuracy": 0.943505771458149, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -17057,7 +17057,7 @@ } }, { - "accuracy": 0.94978042319417, + "accuracy": 0.9497714042663574, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -17106,7 +17106,7 @@ } }, { - "accuracy": 0.9677746780216694, + "accuracy": 0.9677646122872829, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -17149,7 +17149,7 @@ } }, { - "accuracy": 0.9705556537955999, + "accuracy": 0.9705599583685398, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -17192,7 +17192,7 @@ } }, { - "accuracy": 0.9684275537729263, + "accuracy": 0.9684217162430286, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -17244,7 +17244,7 @@ } }, { - "accuracy": 0.972480921074748, + "accuracy": 0.9724891372025013, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -17296,7 +17296,7 @@ } }, { - "accuracy": 0.9838621038943529, + "accuracy": 0.9838705994188786, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -17348,7 +17348,7 @@ } }, { - "accuracy": 0.9862669911235571, + "accuracy": 0.9862625077366829, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -17400,7 +17400,7 @@ } }, { - "accuracy": 0.9911140948534012, + "accuracy": 0.9911155067384243, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -17443,7 +17443,7 @@ } }, { - "accuracy": 0.9916985612362623, + "accuracy": 0.9917009193450212, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -17492,7 +17492,7 @@ } }, { - "accuracy": 0.9930045073851943, + "accuracy": 0.9930097297765315, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -17538,7 +17538,7 @@ } }, { - "accuracy": 0.9975975535344332, + "accuracy": 0.9975970382802188, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -17580,7 +17580,7 @@ ], "model.layers.9.self_attn": [ { - "accuracy": 0.9156324192881584, + "accuracy": 0.9154471009969711, "total_bits": 30308928, "q_proj": { "group_size": { @@ -17644,7 +17644,7 @@ } }, { - "accuracy": 0.918098546564579, + "accuracy": 0.9180809780955315, "total_bits": 31455808, "q_proj": { "group_size": { @@ -17708,7 +17708,7 @@ } }, { - "accuracy": 0.9291017279028893, + "accuracy": 0.9292140081524849, "total_bits": 33412832, "q_proj": { "group_size": { @@ -17772,7 +17772,7 @@ } }, { - "accuracy": 0.9474808238446712, + "accuracy": 0.9475394748151302, "total_bits": 37983200, "q_proj": { "group_size": { @@ -17836,7 +17836,7 @@ } }, { - "accuracy": 0.9566572569310665, + "accuracy": 0.9566517025232315, "total_bits": 44838176, "q_proj": { "group_size": { @@ -17900,7 +17900,7 @@ } }, { - "accuracy": 0.9572707749903202, + "accuracy": 0.9572483897209167, "total_bits": 44912768, "q_proj": { "group_size": { @@ -17964,7 +17964,7 @@ } }, { - "accuracy": 0.9765746779739857, + "accuracy": 0.9765565041452646, "total_bits": 57355552, "q_proj": { "group_size": { @@ -18016,7 +18016,7 @@ } }, { - "accuracy": 0.9771516304463148, + "accuracy": 0.9771292954683304, "total_bits": 57430144, "q_proj": { "group_size": { @@ -18068,7 +18068,7 @@ } }, { - "accuracy": 0.9786375071853399, + "accuracy": 0.9786593876779079, "total_bits": 57950464, "q_proj": { "group_size": { @@ -18120,7 +18120,7 @@ } }, { - "accuracy": 0.9796764496713877, + "accuracy": 0.9796735905110836, "total_bits": 58692736, "q_proj": { "group_size": { @@ -18172,7 +18172,7 @@ } }, { - "accuracy": 0.9782940931618214, + "accuracy": 0.9782825838774443, "total_bits": 59068544, "q_proj": { "group_size": { @@ -18236,7 +18236,7 @@ } }, { - "accuracy": 0.9796239528805017, + "accuracy": 0.9796628952026367, "total_bits": 59588864, "q_proj": { "group_size": { @@ -18300,7 +18300,7 @@ } }, { - "accuracy": 0.9821039438247681, + "accuracy": 0.9821524657309055, "total_bits": 61536832, "q_proj": { "group_size": { @@ -18361,7 +18361,7 @@ } }, { - "accuracy": 0.9833755418658257, + "accuracy": 0.9833454601466656, "total_bits": 62354560, "q_proj": { "group_size": { @@ -18422,7 +18422,7 @@ } }, { - "accuracy": 0.9902575109153986, + "accuracy": 0.9902483588084579, "total_bits": 75246880, "q_proj": { "group_size": { @@ -18483,7 +18483,7 @@ } }, { - "accuracy": 0.9916903469711542, + "accuracy": 0.9916817611083388, "total_bits": 76510336, "q_proj": { "group_size": { @@ -18544,7 +18544,7 @@ } }, { - "accuracy": 0.993913528509438, + "accuracy": 0.9939101827330887, "total_bits": 85667104, "q_proj": { "group_size": { @@ -18596,7 +18596,7 @@ } }, { - "accuracy": 0.995829266961664, + "accuracy": 0.99583475664258, "total_bits": 91722880, "q_proj": { "group_size": { @@ -18648,7 +18648,7 @@ } }, { - "accuracy": 0.9984044209122658, + "accuracy": 0.9984046985628083, "total_bits": 113978656, "q_proj": { "group_size": { @@ -18702,7 +18702,7 @@ ], "model.layers.9.mlp": [ { - "accuracy": 0.8979872688651085, + "accuracy": 0.8980022817850113, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -18754,7 +18754,7 @@ } }, { - "accuracy": 0.9010738134384155, + "accuracy": 0.9009037613868713, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -18806,7 +18806,7 @@ } }, { - "accuracy": 0.9138390645384789, + "accuracy": 0.9135458469390869, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -18855,7 +18855,7 @@ } }, { - "accuracy": 0.9180445671081543, + "accuracy": 0.9177292063832283, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -18904,7 +18904,7 @@ } }, { - "accuracy": 0.9479075893759727, + "accuracy": 0.9478593431413174, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -18956,7 +18956,7 @@ } }, { - "accuracy": 0.9523597359657288, + "accuracy": 0.9523407816886902, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -19008,7 +19008,7 @@ } }, { - "accuracy": 0.9581680968403816, + "accuracy": 0.958098616451025, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -19057,7 +19057,7 @@ } }, { - "accuracy": 0.9725048933178186, + "accuracy": 0.9725117534399033, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -19100,7 +19100,7 @@ } }, { - "accuracy": 0.9750282820314169, + "accuracy": 0.975043885409832, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -19143,7 +19143,7 @@ } }, { - "accuracy": 0.9732320122420788, + "accuracy": 0.9732519965618849, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -19195,7 +19195,7 @@ } }, { - "accuracy": 0.9767951015383005, + "accuracy": 0.9767921902239323, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -19247,7 +19247,7 @@ } }, { - "accuracy": 0.9863404175266623, + "accuracy": 0.9863471165299416, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -19299,7 +19299,7 @@ } }, { - "accuracy": 0.9883861737325788, + "accuracy": 0.9883871376514435, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -19351,7 +19351,7 @@ } }, { - "accuracy": 0.992430523969233, + "accuracy": 0.9924324788153172, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -19394,7 +19394,7 @@ } }, { - "accuracy": 0.9929885254241526, + "accuracy": 0.9929874055087566, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -19443,7 +19443,7 @@ } }, { - "accuracy": 0.9942358685657382, + "accuracy": 0.9942356436513364, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -19489,7 +19489,7 @@ } }, { - "accuracy": 0.9979169676080346, + "accuracy": 0.9979170782025903, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -19531,7 +19531,7 @@ ], "model.layers.10.self_attn": [ { - "accuracy": 0.9255667924880981, + "accuracy": 0.9254285618662834, "total_bits": 30308928, "q_proj": { "group_size": { @@ -19595,7 +19595,7 @@ } }, { - "accuracy": 0.9285323470830917, + "accuracy": 0.9285455048084259, "total_bits": 31455808, "q_proj": { "group_size": { @@ -19659,7 +19659,7 @@ } }, { - "accuracy": 0.9377534314990044, + "accuracy": 0.9378277622163296, "total_bits": 33412832, "q_proj": { "group_size": { @@ -19723,7 +19723,7 @@ } }, { - "accuracy": 0.9557530134916306, + "accuracy": 0.9558689743280411, "total_bits": 37983200, "q_proj": { "group_size": { @@ -19787,7 +19787,7 @@ } }, { - "accuracy": 0.9622449390590191, + "accuracy": 0.962271124124527, "total_bits": 44838176, "q_proj": { "group_size": { @@ -19851,7 +19851,7 @@ } }, { - "accuracy": 0.9627878665924072, + "accuracy": 0.9627652131021023, "total_bits": 44912768, "q_proj": { "group_size": { @@ -19915,7 +19915,7 @@ } }, { - "accuracy": 0.9794471673667431, + "accuracy": 0.9794409442692995, "total_bits": 57355552, "q_proj": { "group_size": { @@ -19967,7 +19967,7 @@ } }, { - "accuracy": 0.9799556694924831, + "accuracy": 0.9799547977745533, "total_bits": 57430144, "q_proj": { "group_size": { @@ -20019,7 +20019,7 @@ } }, { - "accuracy": 0.9812588579952717, + "accuracy": 0.9812564067542553, "total_bits": 57950464, "q_proj": { "group_size": { @@ -20071,7 +20071,7 @@ } }, { - "accuracy": 0.982187744230032, + "accuracy": 0.98219870403409, "total_bits": 58692736, "q_proj": { "group_size": { @@ -20123,7 +20123,7 @@ } }, { - "accuracy": 0.9810379017144442, + "accuracy": 0.9810190796852112, "total_bits": 59068544, "q_proj": { "group_size": { @@ -20187,7 +20187,7 @@ } }, { - "accuracy": 0.9822507984936237, + "accuracy": 0.9822470918297768, "total_bits": 59588864, "q_proj": { "group_size": { @@ -20251,7 +20251,7 @@ } }, { - "accuracy": 0.9842813797295094, + "accuracy": 0.9842821806669235, "total_bits": 61536832, "q_proj": { "group_size": { @@ -20312,7 +20312,7 @@ } }, { - "accuracy": 0.9854367896914482, + "accuracy": 0.9854378383606672, "total_bits": 62354560, "q_proj": { "group_size": { @@ -20373,7 +20373,7 @@ } }, { - "accuracy": 0.991425178013742, + "accuracy": 0.9914244255051017, "total_bits": 75246880, "q_proj": { "group_size": { @@ -20434,7 +20434,7 @@ } }, { - "accuracy": 0.9926807903684676, + "accuracy": 0.9926830539479852, "total_bits": 76510336, "q_proj": { "group_size": { @@ -20495,7 +20495,7 @@ } }, { - "accuracy": 0.9946406041271985, + "accuracy": 0.994636666495353, "total_bits": 85667104, "q_proj": { "group_size": { @@ -20547,7 +20547,7 @@ } }, { - "accuracy": 0.9963198550976813, + "accuracy": 0.9963192888535559, "total_bits": 91722880, "q_proj": { "group_size": { @@ -20599,7 +20599,7 @@ } }, { - "accuracy": 0.9985976866446435, + "accuracy": 0.9985979022458196, "total_bits": 113978656, "q_proj": { "group_size": { @@ -20653,7 +20653,7 @@ ], "model.layers.10.mlp": [ { - "accuracy": 0.9072035849094391, + "accuracy": 0.9071538224816322, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -20705,7 +20705,7 @@ } }, { - "accuracy": 0.9100625887513161, + "accuracy": 0.9100388288497925, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -20757,7 +20757,7 @@ } }, { - "accuracy": 0.9221524447202682, + "accuracy": 0.922150731086731, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -20806,7 +20806,7 @@ } }, { - "accuracy": 0.9259821102023125, + "accuracy": 0.9259923249483109, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -20855,7 +20855,7 @@ } }, { - "accuracy": 0.9525629505515099, + "accuracy": 0.9525576233863831, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -20907,7 +20907,7 @@ } }, { - "accuracy": 0.9565679579973221, + "accuracy": 0.9565683789551258, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -20959,7 +20959,7 @@ } }, { - "accuracy": 0.9619633853435516, + "accuracy": 0.9619757160544395, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -21008,7 +21008,7 @@ } }, { - "accuracy": 0.975079670548439, + "accuracy": 0.9750785324722528, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -21051,7 +21051,7 @@ } }, { - "accuracy": 0.9773116856813431, + "accuracy": 0.9773098323494196, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -21094,7 +21094,7 @@ } }, { - "accuracy": 0.9756849519908428, + "accuracy": 0.9756814874708652, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -21146,7 +21146,7 @@ } }, { - "accuracy": 0.9788219351321459, + "accuracy": 0.9788009151816368, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -21198,7 +21198,7 @@ } }, { - "accuracy": 0.9875948084518313, + "accuracy": 0.9875935269519687, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -21250,7 +21250,7 @@ } }, { - "accuracy": 0.9894048757851124, + "accuracy": 0.9894073354080319, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -21302,7 +21302,7 @@ } }, { - "accuracy": 0.993154349271208, + "accuracy": 0.9931564317084849, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -21345,7 +21345,7 @@ } }, { - "accuracy": 0.9936201777309179, + "accuracy": 0.9936129190027714, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -21394,7 +21394,7 @@ } }, { - "accuracy": 0.9947737217880785, + "accuracy": 0.9947650441899896, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -21440,7 +21440,7 @@ } }, { - "accuracy": 0.9981311410665512, + "accuracy": 0.9981314450269565, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -21482,7 +21482,7 @@ ], "model.layers.11.self_attn": [ { - "accuracy": 0.9352168366312981, + "accuracy": 0.9351540505886078, "total_bits": 30308928, "q_proj": { "group_size": { @@ -21546,7 +21546,7 @@ } }, { - "accuracy": 0.9374474063515663, + "accuracy": 0.9374874904751778, "total_bits": 31455808, "q_proj": { "group_size": { @@ -21610,7 +21610,7 @@ } }, { - "accuracy": 0.9444124512374401, + "accuracy": 0.9444890134036541, "total_bits": 33412832, "q_proj": { "group_size": { @@ -21674,7 +21674,7 @@ } }, { - "accuracy": 0.9625572040677071, + "accuracy": 0.9625845514237881, "total_bits": 37983200, "q_proj": { "group_size": { @@ -21738,7 +21738,7 @@ } }, { - "accuracy": 0.9669128507375717, + "accuracy": 0.9669028967618942, "total_bits": 44838176, "q_proj": { "group_size": { @@ -21802,7 +21802,7 @@ } }, { - "accuracy": 0.967328879982233, + "accuracy": 0.9672983437776566, "total_bits": 44912768, "q_proj": { "group_size": { @@ -21866,7 +21866,7 @@ } }, { - "accuracy": 0.9819058496505022, + "accuracy": 0.9818990211933851, "total_bits": 57355552, "q_proj": { "group_size": { @@ -21918,7 +21918,7 @@ } }, { - "accuracy": 0.9823221303522587, + "accuracy": 0.9823174811899662, "total_bits": 57430144, "q_proj": { "group_size": { @@ -21970,7 +21970,7 @@ } }, { - "accuracy": 0.9834885075688362, + "accuracy": 0.9834803491830826, "total_bits": 57950464, "q_proj": { "group_size": { @@ -22022,7 +22022,7 @@ } }, { - "accuracy": 0.9843579828739166, + "accuracy": 0.9843547828495502, "total_bits": 58692736, "q_proj": { "group_size": { @@ -22074,7 +22074,7 @@ } }, { - "accuracy": 0.9833086878061295, + "accuracy": 0.9833086282014847, "total_bits": 59068544, "q_proj": { "group_size": { @@ -22138,7 +22138,7 @@ } }, { - "accuracy": 0.9843683261424303, + "accuracy": 0.9843555800616741, "total_bits": 59588864, "q_proj": { "group_size": { @@ -22202,7 +22202,7 @@ } }, { - "accuracy": 0.9858474927023053, + "accuracy": 0.9858486671000719, "total_bits": 61536832, "q_proj": { "group_size": { @@ -22263,7 +22263,7 @@ } }, { - "accuracy": 0.9868558822199702, + "accuracy": 0.9868420660495758, "total_bits": 62354560, "q_proj": { "group_size": { @@ -22324,7 +22324,7 @@ } }, { - "accuracy": 0.9922798662446439, + "accuracy": 0.9922812515869737, "total_bits": 75246880, "q_proj": { "group_size": { @@ -22385,7 +22385,7 @@ } }, { - "accuracy": 0.9933844502083957, + "accuracy": 0.9933867929503322, "total_bits": 76510336, "q_proj": { "group_size": { @@ -22446,7 +22446,7 @@ } }, { - "accuracy": 0.9952675346285105, + "accuracy": 0.9952673111110926, "total_bits": 85667104, "q_proj": { "group_size": { @@ -22498,7 +22498,7 @@ } }, { - "accuracy": 0.9966305803973228, + "accuracy": 0.9966264350805432, "total_bits": 91722880, "q_proj": { "group_size": { @@ -22550,7 +22550,7 @@ } }, { - "accuracy": 0.9987549100769684, + "accuracy": 0.9987560133449733, "total_bits": 113978656, "q_proj": { "group_size": { @@ -22604,7 +22604,7 @@ ], "model.layers.11.mlp": [ { - "accuracy": 0.922992967069149, + "accuracy": 0.9229258298873901, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -22656,7 +22656,7 @@ } }, { - "accuracy": 0.9253613352775574, + "accuracy": 0.9253562912344933, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -22708,7 +22708,7 @@ } }, { - "accuracy": 0.9349236041307449, + "accuracy": 0.9348956197500229, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -22757,7 +22757,7 @@ } }, { - "accuracy": 0.9380608797073364, + "accuracy": 0.9380255676805973, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -22806,7 +22806,7 @@ } }, { - "accuracy": 0.9607282392680645, + "accuracy": 0.9607758708298206, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -22858,7 +22858,7 @@ } }, { - "accuracy": 0.9641470424830914, + "accuracy": 0.964177817106247, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -22910,7 +22910,7 @@ } }, { - "accuracy": 0.9683838337659836, + "accuracy": 0.9684282056987286, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -22959,7 +22959,7 @@ } }, { - "accuracy": 0.9791794028133154, + "accuracy": 0.9791763704270124, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -23002,7 +23002,7 @@ } }, { - "accuracy": 0.9810750614851713, + "accuracy": 0.9810761008411646, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -23045,7 +23045,7 @@ } }, { - "accuracy": 0.9798438455909491, + "accuracy": 0.9798630569130182, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -23097,7 +23097,7 @@ } }, { - "accuracy": 0.9825253784656525, + "accuracy": 0.9825179129838943, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -23149,7 +23149,7 @@ } }, { - "accuracy": 0.9897021958604455, + "accuracy": 0.9896943140774965, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -23201,7 +23201,7 @@ } }, { - "accuracy": 0.9912520227953792, + "accuracy": 0.9912572083994746, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -23253,7 +23253,7 @@ } }, { - "accuracy": 0.994236150290817, + "accuracy": 0.9942377745173872, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -23296,7 +23296,7 @@ } }, { - "accuracy": 0.9947156864218414, + "accuracy": 0.9947184869088233, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -23345,7 +23345,7 @@ } }, { - "accuracy": 0.9956142762675881, + "accuracy": 0.9956165258772671, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -23391,7 +23391,7 @@ } }, { - "accuracy": 0.9984074216336012, + "accuracy": 0.9984078536508605, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -23433,7 +23433,7 @@ ], "model.layers.12.self_attn": [ { - "accuracy": 0.918254628777504, + "accuracy": 0.9183487147092819, "total_bits": 30308928, "q_proj": { "group_size": { @@ -23497,7 +23497,7 @@ } }, { - "accuracy": 0.9215041920542717, + "accuracy": 0.9214219599962234, "total_bits": 31455808, "q_proj": { "group_size": { @@ -23561,7 +23561,7 @@ } }, { - "accuracy": 0.9291860163211823, + "accuracy": 0.9291634485125542, "total_bits": 33412832, "q_proj": { "group_size": { @@ -23625,7 +23625,7 @@ } }, { - "accuracy": 0.9523625448346138, + "accuracy": 0.9523275718092918, "total_bits": 37983200, "q_proj": { "group_size": { @@ -23689,7 +23689,7 @@ } }, { - "accuracy": 0.9582577794790268, + "accuracy": 0.958238810300827, "total_bits": 44838176, "q_proj": { "group_size": { @@ -23753,7 +23753,7 @@ } }, { - "accuracy": 0.9587131217122078, + "accuracy": 0.9586759731173515, "total_bits": 44912768, "q_proj": { "group_size": { @@ -23817,7 +23817,7 @@ } }, { - "accuracy": 0.9767006933689117, + "accuracy": 0.9766934681683779, "total_bits": 57355552, "q_proj": { "group_size": { @@ -23869,7 +23869,7 @@ } }, { - "accuracy": 0.9771417211741209, + "accuracy": 0.9771546181291342, "total_bits": 57430144, "q_proj": { "group_size": { @@ -23921,7 +23921,7 @@ } }, { - "accuracy": 0.9786731544882059, + "accuracy": 0.978665629401803, "total_bits": 57950464, "q_proj": { "group_size": { @@ -23973,7 +23973,7 @@ } }, { - "accuracy": 0.9797742627561092, + "accuracy": 0.9797797929495573, "total_bits": 58692736, "q_proj": { "group_size": { @@ -24025,7 +24025,7 @@ } }, { - "accuracy": 0.9788827616721392, + "accuracy": 0.9788805264979601, "total_bits": 59068544, "q_proj": { "group_size": { @@ -24089,7 +24089,7 @@ } }, { - "accuracy": 0.980213237926364, + "accuracy": 0.9802060816437006, "total_bits": 59588864, "q_proj": { "group_size": { @@ -24153,7 +24153,7 @@ } }, { - "accuracy": 0.9817759990692139, + "accuracy": 0.9817687887698412, "total_bits": 61536832, "q_proj": { "group_size": { @@ -24214,7 +24214,7 @@ } }, { - "accuracy": 0.9829946663230658, + "accuracy": 0.9829999972134829, "total_bits": 62354560, "q_proj": { "group_size": { @@ -24275,7 +24275,7 @@ } }, { - "accuracy": 0.9900454664602876, + "accuracy": 0.9900399595499039, "total_bits": 75246880, "q_proj": { "group_size": { @@ -24336,7 +24336,7 @@ } }, { - "accuracy": 0.9914399096742272, + "accuracy": 0.9914370570331812, "total_bits": 76510336, "q_proj": { "group_size": { @@ -24397,7 +24397,7 @@ } }, { - "accuracy": 0.9938779231160879, + "accuracy": 0.9938817266374826, "total_bits": 85667104, "q_proj": { "group_size": { @@ -24449,7 +24449,7 @@ } }, { - "accuracy": 0.9955275356769562, + "accuracy": 0.9955261144787073, "total_bits": 91722880, "q_proj": { "group_size": { @@ -24501,7 +24501,7 @@ } }, { - "accuracy": 0.998387377592735, + "accuracy": 0.998388102161698, "total_bits": 113978656, "q_proj": { "group_size": { @@ -24555,7 +24555,7 @@ ], "model.layers.12.mlp": [ { - "accuracy": 0.9067049399018288, + "accuracy": 0.9067919254302979, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -24607,7 +24607,7 @@ } }, { - "accuracy": 0.9094245880842209, + "accuracy": 0.9094521254301071, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -24659,7 +24659,7 @@ } }, { - "accuracy": 0.9205821231007576, + "accuracy": 0.9205865487456322, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -24708,7 +24708,7 @@ } }, { - "accuracy": 0.924173966050148, + "accuracy": 0.9241846203804016, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -24757,7 +24757,7 @@ } }, { - "accuracy": 0.9520841389894485, + "accuracy": 0.9520521014928818, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -24809,7 +24809,7 @@ } }, { - "accuracy": 0.9562458842992783, + "accuracy": 0.9562351442873478, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -24861,7 +24861,7 @@ } }, { - "accuracy": 0.9611620157957077, + "accuracy": 0.961151484400034, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -24910,7 +24910,7 @@ } }, { - "accuracy": 0.9748448915779591, + "accuracy": 0.9748354908078909, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -24953,7 +24953,7 @@ } }, { - "accuracy": 0.9771162606775761, + "accuracy": 0.9771051350980997, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -24996,7 +24996,7 @@ } }, { - "accuracy": 0.975365836173296, + "accuracy": 0.9753743782639503, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -25048,7 +25048,7 @@ } }, { - "accuracy": 0.9786117561161518, + "accuracy": 0.9786114767193794, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -25100,7 +25100,7 @@ } }, { - "accuracy": 0.9873783187940717, + "accuracy": 0.9873899882659316, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -25152,7 +25152,7 @@ } }, { - "accuracy": 0.9893058259040117, + "accuracy": 0.9893016312271357, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -25204,7 +25204,7 @@ } }, { - "accuracy": 0.9930427521467209, + "accuracy": 0.993039789609611, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -25247,7 +25247,7 @@ } }, { - "accuracy": 0.9935374888591468, + "accuracy": 0.9935355954803526, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -25296,7 +25296,7 @@ } }, { - "accuracy": 0.9945738040842116, + "accuracy": 0.9945709388703108, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -25342,7 +25342,7 @@ } }, { - "accuracy": 0.9981157196452841, + "accuracy": 0.9981151465326548, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -25384,7 +25384,7 @@ ], "model.layers.13.self_attn": [ { - "accuracy": 0.914382241666317, + "accuracy": 0.9144919887185097, "total_bits": 30308928, "q_proj": { "group_size": { @@ -25448,7 +25448,7 @@ } }, { - "accuracy": 0.9174628779292107, + "accuracy": 0.9174542501568794, "total_bits": 31455808, "q_proj": { "group_size": { @@ -25512,7 +25512,7 @@ } }, { - "accuracy": 0.9295148551464081, + "accuracy": 0.9295745342969894, "total_bits": 33412832, "q_proj": { "group_size": { @@ -25576,7 +25576,7 @@ } }, { - "accuracy": 0.9491968564689159, + "accuracy": 0.9492183402180672, "total_bits": 37983200, "q_proj": { "group_size": { @@ -25640,7 +25640,7 @@ } }, { - "accuracy": 0.955919798463583, + "accuracy": 0.9559485502541065, "total_bits": 44838176, "q_proj": { "group_size": { @@ -25704,7 +25704,7 @@ } }, { - "accuracy": 0.9567287154495716, + "accuracy": 0.9566921889781952, "total_bits": 44912768, "q_proj": { "group_size": { @@ -25768,7 +25768,7 @@ } }, { - "accuracy": 0.9756081849336624, + "accuracy": 0.9756113328039646, "total_bits": 57355552, "q_proj": { "group_size": { @@ -25820,7 +25820,7 @@ } }, { - "accuracy": 0.9762581083923578, + "accuracy": 0.9762618523091078, "total_bits": 57430144, "q_proj": { "group_size": { @@ -25872,7 +25872,7 @@ } }, { - "accuracy": 0.9783238749951124, + "accuracy": 0.9782921988517046, "total_bits": 57950464, "q_proj": { "group_size": { @@ -25924,7 +25924,7 @@ } }, { - "accuracy": 0.9795089289546013, + "accuracy": 0.9794944487512112, "total_bits": 58692736, "q_proj": { "group_size": { @@ -25976,7 +25976,7 @@ } }, { - "accuracy": 0.9779392145574093, + "accuracy": 0.9778795074671507, "total_bits": 59068544, "q_proj": { "group_size": { @@ -26040,7 +26040,7 @@ } }, { - "accuracy": 0.9795319139957428, + "accuracy": 0.979520695284009, "total_bits": 59588864, "q_proj": { "group_size": { @@ -26104,7 +26104,7 @@ } }, { - "accuracy": 0.9819448851048946, + "accuracy": 0.9819323495030403, "total_bits": 61536832, "q_proj": { "group_size": { @@ -26165,7 +26165,7 @@ } }, { - "accuracy": 0.9835209306329489, + "accuracy": 0.9835347458720207, "total_bits": 62354560, "q_proj": { "group_size": { @@ -26226,7 +26226,7 @@ } }, { - "accuracy": 0.9901209101080894, + "accuracy": 0.9901253506541252, "total_bits": 75246880, "q_proj": { "group_size": { @@ -26287,7 +26287,7 @@ } }, { - "accuracy": 0.9917429555207491, + "accuracy": 0.9917331263422966, "total_bits": 76510336, "q_proj": { "group_size": { @@ -26348,7 +26348,7 @@ } }, { - "accuracy": 0.9937325660139322, + "accuracy": 0.9937320556491613, "total_bits": 85667104, "q_proj": { "group_size": { @@ -26400,7 +26400,7 @@ } }, { - "accuracy": 0.9958382518962026, + "accuracy": 0.9958483558148146, "total_bits": 91722880, "q_proj": { "group_size": { @@ -26452,7 +26452,7 @@ } }, { - "accuracy": 0.9983558729290962, + "accuracy": 0.9983553681522608, "total_bits": 113978656, "q_proj": { "group_size": { @@ -26506,7 +26506,7 @@ ], "model.layers.13.mlp": [ { - "accuracy": 0.9058817103505135, + "accuracy": 0.9057564288377762, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -26558,7 +26558,7 @@ } }, { - "accuracy": 0.9087112993001938, + "accuracy": 0.9086236134171486, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -26610,7 +26610,7 @@ } }, { - "accuracy": 0.9195607155561447, + "accuracy": 0.9196574911475182, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -26659,7 +26659,7 @@ } }, { - "accuracy": 0.923167884349823, + "accuracy": 0.9232490658760071, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -26708,7 +26708,7 @@ } }, { - "accuracy": 0.9515799544751644, + "accuracy": 0.9516337104141712, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -26760,7 +26760,7 @@ } }, { - "accuracy": 0.9560265839099884, + "accuracy": 0.9560323432087898, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -26812,7 +26812,7 @@ } }, { - "accuracy": 0.9607695490121841, + "accuracy": 0.9607681259512901, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -26861,7 +26861,7 @@ } }, { - "accuracy": 0.9745559766888618, + "accuracy": 0.9745499193668365, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -26904,7 +26904,7 @@ } }, { - "accuracy": 0.976823752745986, + "accuracy": 0.9768188558518887, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -26947,7 +26947,7 @@ } }, { - "accuracy": 0.9750555753707886, + "accuracy": 0.9750737082213163, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -26999,7 +26999,7 @@ } }, { - "accuracy": 0.978454452008009, + "accuracy": 0.9784559179097414, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -27051,7 +27051,7 @@ } }, { - "accuracy": 0.9872105978429317, + "accuracy": 0.9872140353545547, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -27103,7 +27103,7 @@ } }, { - "accuracy": 0.9891844298690557, + "accuracy": 0.9891853602603078, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -27155,7 +27155,7 @@ } }, { - "accuracy": 0.992884736508131, + "accuracy": 0.9928796184249222, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -27198,7 +27198,7 @@ } }, { - "accuracy": 0.9934014892205596, + "accuracy": 0.9934069863520563, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -27247,7 +27247,7 @@ } }, { - "accuracy": 0.9944165083579719, + "accuracy": 0.9944231859408319, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -27293,7 +27293,7 @@ } }, { - "accuracy": 0.9980151185300201, + "accuracy": 0.9980159213300794, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -27335,7 +27335,7 @@ ], "model.layers.14.self_attn": [ { - "accuracy": 0.9055267497897148, + "accuracy": 0.9052515029907227, "total_bits": 30308928, "q_proj": { "group_size": { @@ -27399,7 +27399,7 @@ } }, { - "accuracy": 0.9082472920417786, + "accuracy": 0.9081130772829056, "total_bits": 31455808, "q_proj": { "group_size": { @@ -27463,7 +27463,7 @@ } }, { - "accuracy": 0.9182290732860565, + "accuracy": 0.9183298647403717, "total_bits": 33412832, "q_proj": { "group_size": { @@ -27527,7 +27527,7 @@ } }, { - "accuracy": 0.9408689215779305, + "accuracy": 0.9409149326384068, "total_bits": 37983200, "q_proj": { "group_size": { @@ -27591,7 +27591,7 @@ } }, { - "accuracy": 0.9510448761284351, + "accuracy": 0.9511469714343548, "total_bits": 44838176, "q_proj": { "group_size": { @@ -27655,7 +27655,7 @@ } }, { - "accuracy": 0.9517546966671944, + "accuracy": 0.9518323987722397, "total_bits": 44912768, "q_proj": { "group_size": { @@ -27719,7 +27719,7 @@ } }, { - "accuracy": 0.9723473638296127, + "accuracy": 0.9723943509161472, "total_bits": 57355552, "q_proj": { "group_size": { @@ -27771,7 +27771,7 @@ } }, { - "accuracy": 0.9730114750564098, + "accuracy": 0.9731061886996031, "total_bits": 57430144, "q_proj": { "group_size": { @@ -27823,7 +27823,7 @@ } }, { - "accuracy": 0.9746960289776325, + "accuracy": 0.9747657887637615, "total_bits": 57950464, "q_proj": { "group_size": { @@ -27875,7 +27875,7 @@ } }, { - "accuracy": 0.9767959043383598, + "accuracy": 0.9768108148127794, "total_bits": 58692736, "q_proj": { "group_size": { @@ -27927,7 +27927,7 @@ } }, { - "accuracy": 0.9753426536917686, + "accuracy": 0.975377295166254, "total_bits": 59068544, "q_proj": { "group_size": { @@ -27991,7 +27991,7 @@ } }, { - "accuracy": 0.9770860001444817, + "accuracy": 0.9770306386053562, "total_bits": 59588864, "q_proj": { "group_size": { @@ -28055,7 +28055,7 @@ } }, { - "accuracy": 0.979099478572607, + "accuracy": 0.9790472611784935, "total_bits": 61536832, "q_proj": { "group_size": { @@ -28116,7 +28116,7 @@ } }, { - "accuracy": 0.9806705601513386, + "accuracy": 0.9806842133402824, "total_bits": 62354560, "q_proj": { "group_size": { @@ -28177,7 +28177,7 @@ } }, { - "accuracy": 0.9885000661015511, + "accuracy": 0.9885049602016807, "total_bits": 75246880, "q_proj": { "group_size": { @@ -28238,7 +28238,7 @@ } }, { - "accuracy": 0.9902753746137023, + "accuracy": 0.9902975112199783, "total_bits": 76510336, "q_proj": { "group_size": { @@ -28299,7 +28299,7 @@ } }, { - "accuracy": 0.992810903582722, + "accuracy": 0.9927993132732809, "total_bits": 85667104, "q_proj": { "group_size": { @@ -28351,7 +28351,7 @@ } }, { - "accuracy": 0.9949225764721632, + "accuracy": 0.994893008377403, "total_bits": 91722880, "q_proj": { "group_size": { @@ -28403,7 +28403,7 @@ } }, { - "accuracy": 0.9981167989317328, + "accuracy": 0.998116486473009, "total_bits": 113978656, "q_proj": { "group_size": { @@ -28457,7 +28457,7 @@ ], "model.layers.14.mlp": [ { - "accuracy": 0.9059914350509644, + "accuracy": 0.90602907538414, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -28509,7 +28509,7 @@ } }, { - "accuracy": 0.9088080897927284, + "accuracy": 0.9087480306625366, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -28561,7 +28561,7 @@ } }, { - "accuracy": 0.9203511402010918, + "accuracy": 0.9203285947442055, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -28610,7 +28610,7 @@ } }, { - "accuracy": 0.9241925030946732, + "accuracy": 0.9241577237844467, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -28659,7 +28659,7 @@ } }, { - "accuracy": 0.9516731016337872, + "accuracy": 0.951668631285429, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -28711,7 +28711,7 @@ } }, { - "accuracy": 0.9560071490705013, + "accuracy": 0.9559860564768314, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -28763,7 +28763,7 @@ } }, { - "accuracy": 0.9610604681074619, + "accuracy": 0.9610610008239746, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -28812,7 +28812,7 @@ } }, { - "accuracy": 0.9745946303009987, + "accuracy": 0.9746211394667625, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -28855,7 +28855,7 @@ } }, { - "accuracy": 0.9768316633999348, + "accuracy": 0.9768539238721132, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -28898,7 +28898,7 @@ } }, { - "accuracy": 0.9751278180629015, + "accuracy": 0.9751336295157671, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -28950,7 +28950,7 @@ } }, { - "accuracy": 0.9784498494118452, + "accuracy": 0.978433957323432, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -29002,7 +29002,7 @@ } }, { - "accuracy": 0.9872479913756251, + "accuracy": 0.9872386232018471, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -29054,7 +29054,7 @@ } }, { - "accuracy": 0.9891970083117485, + "accuracy": 0.98919727653265, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -29106,7 +29106,7 @@ } }, { - "accuracy": 0.992910641245544, + "accuracy": 0.9929077257402241, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -29149,7 +29149,7 @@ } }, { - "accuracy": 0.9934103316627443, + "accuracy": 0.99340810906142, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -29198,7 +29198,7 @@ } }, { - "accuracy": 0.9944956391118467, + "accuracy": 0.9944933266378939, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -29244,7 +29244,7 @@ } }, { - "accuracy": 0.9980454861652106, + "accuracy": 0.9980455068871379, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -29286,7 +29286,7 @@ ], "model.layers.15.self_attn": [ { - "accuracy": 0.8662237524986267, + "accuracy": 0.8660960495471954, "total_bits": 30308928, "q_proj": { "group_size": { @@ -29350,7 +29350,7 @@ } }, { - "accuracy": 0.8720844835042953, + "accuracy": 0.8721434473991394, "total_bits": 31455808, "q_proj": { "group_size": { @@ -29414,7 +29414,7 @@ } }, { - "accuracy": 0.8906018137931824, + "accuracy": 0.8906832113862038, "total_bits": 33412832, "q_proj": { "group_size": { @@ -29478,7 +29478,7 @@ } }, { - "accuracy": 0.922219954431057, + "accuracy": 0.9223601892590523, "total_bits": 37983200, "q_proj": { "group_size": { @@ -29542,7 +29542,7 @@ } }, { - "accuracy": 0.9315391108393669, + "accuracy": 0.9315524771809578, "total_bits": 44838176, "q_proj": { "group_size": { @@ -29606,7 +29606,7 @@ } }, { - "accuracy": 0.9328188821673393, + "accuracy": 0.9328577220439911, "total_bits": 44912768, "q_proj": { "group_size": { @@ -29670,7 +29670,7 @@ } }, { - "accuracy": 0.9618901945650578, + "accuracy": 0.9618603363633156, "total_bits": 57355552, "q_proj": { "group_size": { @@ -29722,7 +29722,7 @@ } }, { - "accuracy": 0.9632286131381989, + "accuracy": 0.9631414413452148, "total_bits": 57430144, "q_proj": { "group_size": { @@ -29774,7 +29774,7 @@ } }, { - "accuracy": 0.9660533033311367, + "accuracy": 0.9660372957587242, "total_bits": 57950464, "q_proj": { "group_size": { @@ -29826,7 +29826,7 @@ } }, { - "accuracy": 0.9675209484994411, + "accuracy": 0.9675589837133884, "total_bits": 58692736, "q_proj": { "group_size": { @@ -29878,7 +29878,7 @@ } }, { - "accuracy": 0.9656884074211121, + "accuracy": 0.9657478630542755, "total_bits": 59068544, "q_proj": { "group_size": { @@ -29942,7 +29942,7 @@ } }, { - "accuracy": 0.9681769870221615, + "accuracy": 0.9681409150362015, "total_bits": 59588864, "q_proj": { "group_size": { @@ -30006,7 +30006,7 @@ } }, { - "accuracy": 0.9722221530973911, + "accuracy": 0.9722285084426403, "total_bits": 61536832, "q_proj": { "group_size": { @@ -30067,7 +30067,7 @@ } }, { - "accuracy": 0.9743126630783081, + "accuracy": 0.9743023552000523, "total_bits": 62354560, "q_proj": { "group_size": { @@ -30128,7 +30128,7 @@ } }, { - "accuracy": 0.9847090318799019, + "accuracy": 0.984696488827467, "total_bits": 75246880, "q_proj": { "group_size": { @@ -30189,7 +30189,7 @@ } }, { - "accuracy": 0.987136579118669, + "accuracy": 0.9871381670236588, "total_bits": 76510336, "q_proj": { "group_size": { @@ -30250,7 +30250,7 @@ } }, { - "accuracy": 0.9900257075205445, + "accuracy": 0.9900241773575544, "total_bits": 85667104, "q_proj": { "group_size": { @@ -30302,7 +30302,7 @@ } }, { - "accuracy": 0.993544745258987, + "accuracy": 0.9935367675498128, "total_bits": 91722880, "q_proj": { "group_size": { @@ -30354,7 +30354,7 @@ } }, { - "accuracy": 0.9974129700567573, + "accuracy": 0.9974108194001019, "total_bits": 113978656, "q_proj": { "group_size": { @@ -30408,7 +30408,7 @@ ], "model.layers.15.mlp": [ { - "accuracy": 0.888176292181015, + "accuracy": 0.8881220296025276, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -30460,7 +30460,7 @@ } }, { - "accuracy": 0.8917654976248741, + "accuracy": 0.8917921856045723, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -30512,7 +30512,7 @@ } }, { - "accuracy": 0.9060942456126213, + "accuracy": 0.9061857014894485, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -30561,7 +30561,7 @@ } }, { - "accuracy": 0.9107229635119438, + "accuracy": 0.9108146727085114, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -30610,7 +30610,7 @@ } }, { - "accuracy": 0.9432618841528893, + "accuracy": 0.9432695247232914, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -30662,7 +30662,7 @@ } }, { - "accuracy": 0.9481836818158627, + "accuracy": 0.9481980800628662, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -30714,7 +30714,7 @@ } }, { - "accuracy": 0.9543357789516449, + "accuracy": 0.9543447978794575, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -30763,7 +30763,7 @@ } }, { - "accuracy": 0.9701154325157404, + "accuracy": 0.9701192807406187, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -30806,7 +30806,7 @@ } }, { - "accuracy": 0.9727210849523544, + "accuracy": 0.9727198947221041, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -30849,7 +30849,7 @@ } }, { - "accuracy": 0.9709262512624264, + "accuracy": 0.9709332976490259, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -30901,7 +30901,7 @@ } }, { - "accuracy": 0.9747244548052549, + "accuracy": 0.9747237414121628, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -30953,7 +30953,7 @@ } }, { - "accuracy": 0.9851207751780748, + "accuracy": 0.9851209698244929, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -31005,7 +31005,7 @@ } }, { - "accuracy": 0.9873405825346708, + "accuracy": 0.9873364437371492, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -31057,7 +31057,7 @@ } }, { - "accuracy": 0.991690007969737, + "accuracy": 0.991686038672924, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -31100,7 +31100,7 @@ } }, { - "accuracy": 0.9923195065930486, + "accuracy": 0.9923256579786539, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -31149,7 +31149,7 @@ } }, { - "accuracy": 0.9936661920510232, + "accuracy": 0.993671263102442, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -31195,7 +31195,7 @@ } }, { - "accuracy": 0.9976954362355173, + "accuracy": 0.9976960332132876, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -31237,7 +31237,7 @@ ], "model.layers.16.self_attn": [ { - "accuracy": 0.8969288393855095, + "accuracy": 0.8970862925052643, "total_bits": 30308928, "q_proj": { "group_size": { @@ -31301,7 +31301,7 @@ } }, { - "accuracy": 0.9022532030940056, + "accuracy": 0.9023451805114746, "total_bits": 31455808, "q_proj": { "group_size": { @@ -31365,7 +31365,7 @@ } }, { - "accuracy": 0.9167034402489662, + "accuracy": 0.9166429415345192, "total_bits": 33412832, "q_proj": { "group_size": { @@ -31429,7 +31429,7 @@ } }, { - "accuracy": 0.9358327612280846, + "accuracy": 0.9357787296175957, "total_bits": 37983200, "q_proj": { "group_size": { @@ -31493,7 +31493,7 @@ } }, { - "accuracy": 0.9472634494304657, + "accuracy": 0.9472717791795731, "total_bits": 44838176, "q_proj": { "group_size": { @@ -31557,7 +31557,7 @@ } }, { - "accuracy": 0.9484786465764046, + "accuracy": 0.9484795853495598, "total_bits": 44912768, "q_proj": { "group_size": { @@ -31621,7 +31621,7 @@ } }, { - "accuracy": 0.9694637320935726, + "accuracy": 0.9694760199636221, "total_bits": 57355552, "q_proj": { "group_size": { @@ -31673,7 +31673,7 @@ } }, { - "accuracy": 0.9708479661494493, + "accuracy": 0.9708113986998796, "total_bits": 57430144, "q_proj": { "group_size": { @@ -31725,7 +31725,7 @@ } }, { - "accuracy": 0.9735263660550117, + "accuracy": 0.9735017456114292, "total_bits": 57950464, "q_proj": { "group_size": { @@ -31777,7 +31777,7 @@ } }, { - "accuracy": 0.9748429581522942, + "accuracy": 0.9747295752167702, "total_bits": 58692736, "q_proj": { "group_size": { @@ -31829,7 +31829,7 @@ } }, { - "accuracy": 0.9736697655171156, + "accuracy": 0.9736849442124367, "total_bits": 59068544, "q_proj": { "group_size": { @@ -31893,7 +31893,7 @@ } }, { - "accuracy": 0.9757199659943581, + "accuracy": 0.9757067002356052, "total_bits": 59588864, "q_proj": { "group_size": { @@ -31957,7 +31957,7 @@ } }, { - "accuracy": 0.9787998888641596, + "accuracy": 0.9787530265748501, "total_bits": 61536832, "q_proj": { "group_size": { @@ -32018,7 +32018,7 @@ } }, { - "accuracy": 0.9805225525051355, + "accuracy": 0.9805027451366186, "total_bits": 62354560, "q_proj": { "group_size": { @@ -32079,7 +32079,7 @@ } }, { - "accuracy": 0.9882316552102566, + "accuracy": 0.9882538206875324, "total_bits": 75246880, "q_proj": { "group_size": { @@ -32140,7 +32140,7 @@ } }, { - "accuracy": 0.9901310745626688, + "accuracy": 0.9901269851252437, "total_bits": 76510336, "q_proj": { "group_size": { @@ -32201,7 +32201,7 @@ } }, { - "accuracy": 0.9920943574979901, + "accuracy": 0.9920847797766328, "total_bits": 85667104, "q_proj": { "group_size": { @@ -32253,7 +32253,7 @@ } }, { - "accuracy": 0.9948356342501938, + "accuracy": 0.9948330130428076, "total_bits": 91722880, "q_proj": { "group_size": { @@ -32305,7 +32305,7 @@ } }, { - "accuracy": 0.9979338457342237, + "accuracy": 0.9979362795129418, "total_bits": 113978656, "q_proj": { "group_size": { @@ -32359,7 +32359,7 @@ ], "model.layers.16.mlp": [ { - "accuracy": 0.8954387977719307, + "accuracy": 0.8952863588929176, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -32411,7 +32411,7 @@ } }, { - "accuracy": 0.8993488028645515, + "accuracy": 0.8992033004760742, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -32463,7 +32463,7 @@ } }, { - "accuracy": 0.9125201851129532, + "accuracy": 0.9123476222157478, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -32512,7 +32512,7 @@ } }, { - "accuracy": 0.916957214474678, + "accuracy": 0.916797399520874, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -32561,7 +32561,7 @@ } }, { - "accuracy": 0.9469495937228203, + "accuracy": 0.9469642452895641, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -32613,7 +32613,7 @@ } }, { - "accuracy": 0.9515363164246082, + "accuracy": 0.951481893658638, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -32665,7 +32665,7 @@ } }, { - "accuracy": 0.9573016315698624, + "accuracy": 0.9572397917509079, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -32714,7 +32714,7 @@ } }, { - "accuracy": 0.97197007201612, + "accuracy": 0.9719776529818773, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -32757,7 +32757,7 @@ } }, { - "accuracy": 0.9743286669254303, + "accuracy": 0.974342368543148, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -32800,7 +32800,7 @@ } }, { - "accuracy": 0.9728555325418711, + "accuracy": 0.9728449210524559, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -32852,7 +32852,7 @@ } }, { - "accuracy": 0.9763589985668659, + "accuracy": 0.9763544015586376, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -32904,7 +32904,7 @@ } }, { - "accuracy": 0.9861350525170565, + "accuracy": 0.9861237006261945, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -32956,7 +32956,7 @@ } }, { - "accuracy": 0.9881947506219149, + "accuracy": 0.9881899319589138, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -33008,7 +33008,7 @@ } }, { - "accuracy": 0.9922279161401093, + "accuracy": 0.9922269973903894, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -33051,7 +33051,7 @@ } }, { - "accuracy": 0.9928275826387107, + "accuracy": 0.9928280608728528, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -33100,7 +33100,7 @@ } }, { - "accuracy": 0.9940253831446171, + "accuracy": 0.9940254967659712, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -33146,7 +33146,7 @@ } }, { - "accuracy": 0.9978487037587911, + "accuracy": 0.9978484844323248, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -33188,7 +33188,7 @@ ], "model.layers.17.self_attn": [ { - "accuracy": 0.8973334357142448, + "accuracy": 0.897542305290699, "total_bits": 30308928, "q_proj": { "group_size": { @@ -33252,7 +33252,7 @@ } }, { - "accuracy": 0.9017208069562912, + "accuracy": 0.9018189832568169, "total_bits": 31455808, "q_proj": { "group_size": { @@ -33316,7 +33316,7 @@ } }, { - "accuracy": 0.9132213443517685, + "accuracy": 0.9131938815116882, "total_bits": 33412832, "q_proj": { "group_size": { @@ -33380,7 +33380,7 @@ } }, { - "accuracy": 0.938893273472786, + "accuracy": 0.9388857409358025, "total_bits": 37983200, "q_proj": { "group_size": { @@ -33444,7 +33444,7 @@ } }, { - "accuracy": 0.9475571848452091, + "accuracy": 0.9475753307342529, "total_bits": 44838176, "q_proj": { "group_size": { @@ -33508,7 +33508,7 @@ } }, { - "accuracy": 0.9484684765338898, + "accuracy": 0.9485311545431614, "total_bits": 44912768, "q_proj": { "group_size": { @@ -33572,7 +33572,7 @@ } }, { - "accuracy": 0.9707033317536116, + "accuracy": 0.970684751868248, "total_bits": 57355552, "q_proj": { "group_size": { @@ -33624,7 +33624,7 @@ } }, { - "accuracy": 0.9716454949229956, + "accuracy": 0.9716228414326906, "total_bits": 57430144, "q_proj": { "group_size": { @@ -33676,7 +33676,7 @@ } }, { - "accuracy": 0.9738726690411568, + "accuracy": 0.973836112767458, "total_bits": 57950464, "q_proj": { "group_size": { @@ -33728,7 +33728,7 @@ } }, { - "accuracy": 0.9751554299145937, + "accuracy": 0.9751142673194408, "total_bits": 58692736, "q_proj": { "group_size": { @@ -33780,7 +33780,7 @@ } }, { - "accuracy": 0.973699688911438, + "accuracy": 0.9736771062016487, "total_bits": 59068544, "q_proj": { "group_size": { @@ -33844,7 +33844,7 @@ } }, { - "accuracy": 0.9755098223686218, + "accuracy": 0.9755203202366829, "total_bits": 59588864, "q_proj": { "group_size": { @@ -33908,7 +33908,7 @@ } }, { - "accuracy": 0.9779850598424673, + "accuracy": 0.9779762793332338, "total_bits": 61536832, "q_proj": { "group_size": { @@ -33969,7 +33969,7 @@ } }, { - "accuracy": 0.9795938245952129, + "accuracy": 0.9795962031930685, "total_bits": 62354560, "q_proj": { "group_size": { @@ -34030,7 +34030,7 @@ } }, { - "accuracy": 0.9879280971363187, + "accuracy": 0.987911774776876, "total_bits": 75246880, "q_proj": { "group_size": { @@ -34091,7 +34091,7 @@ } }, { - "accuracy": 0.989773684181273, + "accuracy": 0.9897674126550555, "total_bits": 76510336, "q_proj": { "group_size": { @@ -34152,7 +34152,7 @@ } }, { - "accuracy": 0.9923741007223725, + "accuracy": 0.9923535925336182, "total_bits": 85667104, "q_proj": { "group_size": { @@ -34204,7 +34204,7 @@ } }, { - "accuracy": 0.9947892669588327, + "accuracy": 0.9947894560173154, "total_bits": 91722880, "q_proj": { "group_size": { @@ -34256,7 +34256,7 @@ } }, { - "accuracy": 0.99802085920237, + "accuracy": 0.998017291072756, "total_bits": 113978656, "q_proj": { "group_size": { @@ -34310,7 +34310,7 @@ ], "model.layers.17.mlp": [ { - "accuracy": 0.8820443674921989, + "accuracy": 0.8820276632905006, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -34362,7 +34362,7 @@ } }, { - "accuracy": 0.8866309821605682, + "accuracy": 0.8865934386849403, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -34414,7 +34414,7 @@ } }, { - "accuracy": 0.9031416177749634, + "accuracy": 0.9032439365983009, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -34463,7 +34463,7 @@ } }, { - "accuracy": 0.9086097851395607, + "accuracy": 0.9086964055895805, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -34512,7 +34512,7 @@ } }, { - "accuracy": 0.9407571367919445, + "accuracy": 0.9407499842345715, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -34564,7 +34564,7 @@ } }, { - "accuracy": 0.9456419833004475, + "accuracy": 0.9456696957349777, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -34616,7 +34616,7 @@ } }, { - "accuracy": 0.9527646824717522, + "accuracy": 0.952761884778738, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -34665,7 +34665,7 @@ } }, { - "accuracy": 0.9684986434876919, + "accuracy": 0.9684955701231956, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -34708,7 +34708,7 @@ } }, { - "accuracy": 0.9712209962308407, + "accuracy": 0.9712157864123583, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -34751,7 +34751,7 @@ } }, { - "accuracy": 0.969695445150137, + "accuracy": 0.969698378816247, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -34803,7 +34803,7 @@ } }, { - "accuracy": 0.973537739366293, + "accuracy": 0.9735384881496429, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -34855,7 +34855,7 @@ } }, { - "accuracy": 0.984518950805068, + "accuracy": 0.9845216348767281, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -34907,7 +34907,7 @@ } }, { - "accuracy": 0.9867853112518787, + "accuracy": 0.9867835436016321, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -34959,7 +34959,7 @@ } }, { - "accuracy": 0.991300848312676, + "accuracy": 0.9913051715120673, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -35002,7 +35002,7 @@ } }, { - "accuracy": 0.9920124607160687, + "accuracy": 0.9920066902413964, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -35051,7 +35051,7 @@ } }, { - "accuracy": 0.9935338972136378, + "accuracy": 0.9935281220823526, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -35097,7 +35097,7 @@ } }, { - "accuracy": 0.9976069342810661, + "accuracy": 0.9976056986488402, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -35139,7 +35139,7 @@ ], "model.layers.18.self_attn": [ { - "accuracy": 0.8961049765348434, + "accuracy": 0.895967960357666, "total_bits": 30308928, "q_proj": { "group_size": { @@ -35203,7 +35203,7 @@ } }, { - "accuracy": 0.9031495451927185, + "accuracy": 0.9031614512205124, "total_bits": 31455808, "q_proj": { "group_size": { @@ -35267,7 +35267,7 @@ } }, { - "accuracy": 0.915522888302803, + "accuracy": 0.9155553206801414, "total_bits": 33412832, "q_proj": { "group_size": { @@ -35331,7 +35331,7 @@ } }, { - "accuracy": 0.9396003894507885, + "accuracy": 0.9396803267300129, "total_bits": 37983200, "q_proj": { "group_size": { @@ -35395,7 +35395,7 @@ } }, { - "accuracy": 0.9481151774525642, + "accuracy": 0.9481409452855587, "total_bits": 44838176, "q_proj": { "group_size": { @@ -35459,7 +35459,7 @@ } }, { - "accuracy": 0.9490873701870441, + "accuracy": 0.9491141587495804, "total_bits": 44912768, "q_proj": { "group_size": { @@ -35523,7 +35523,7 @@ } }, { - "accuracy": 0.9702784214168787, + "accuracy": 0.9702505953609943, "total_bits": 57355552, "q_proj": { "group_size": { @@ -35575,7 +35575,7 @@ } }, { - "accuracy": 0.9712849836796522, + "accuracy": 0.9712676033377647, "total_bits": 57430144, "q_proj": { "group_size": { @@ -35627,7 +35627,7 @@ } }, { - "accuracy": 0.9733225014060736, + "accuracy": 0.9733318723738194, "total_bits": 57950464, "q_proj": { "group_size": { @@ -35679,7 +35679,7 @@ } }, { - "accuracy": 0.9746621809899807, + "accuracy": 0.9746661577373743, "total_bits": 58692736, "q_proj": { "group_size": { @@ -35731,7 +35731,7 @@ } }, { - "accuracy": 0.9740584716200829, + "accuracy": 0.9740777555853128, "total_bits": 59068544, "q_proj": { "group_size": { @@ -35795,7 +35795,7 @@ } }, { - "accuracy": 0.9757704772055149, + "accuracy": 0.975718442350626, "total_bits": 59588864, "q_proj": { "group_size": { @@ -35859,7 +35859,7 @@ } }, { - "accuracy": 0.9784729983657598, + "accuracy": 0.9784317370504141, "total_bits": 61536832, "q_proj": { "group_size": { @@ -35920,7 +35920,7 @@ } }, { - "accuracy": 0.9800199028104544, + "accuracy": 0.9800386726856232, "total_bits": 62354560, "q_proj": { "group_size": { @@ -35981,7 +35981,7 @@ } }, { - "accuracy": 0.9881230108439922, + "accuracy": 0.988131619989872, "total_bits": 75246880, "q_proj": { "group_size": { @@ -36042,7 +36042,7 @@ } }, { - "accuracy": 0.9899277659133077, + "accuracy": 0.9899393441155553, "total_bits": 76510336, "q_proj": { "group_size": { @@ -36103,7 +36103,7 @@ } }, { - "accuracy": 0.9922054400667548, + "accuracy": 0.9921957161277533, "total_bits": 85667104, "q_proj": { "group_size": { @@ -36155,7 +36155,7 @@ } }, { - "accuracy": 0.9948229189030826, + "accuracy": 0.994826287496835, "total_bits": 91722880, "q_proj": { "group_size": { @@ -36207,7 +36207,7 @@ } }, { - "accuracy": 0.9979508460965008, + "accuracy": 0.9979522456414998, "total_bits": 113978656, "q_proj": { "group_size": { @@ -36261,7 +36261,7 @@ ], "model.layers.18.mlp": [ { - "accuracy": 0.8908465802669525, + "accuracy": 0.890760600566864, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -36313,7 +36313,7 @@ } }, { - "accuracy": 0.8948191478848457, + "accuracy": 0.8948594033718109, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -36365,7 +36365,7 @@ } }, { - "accuracy": 0.9099691584706306, + "accuracy": 0.9100265651941299, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -36414,7 +36414,7 @@ } }, { - "accuracy": 0.9150631278753281, + "accuracy": 0.915120542049408, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -36463,7 +36463,7 @@ } }, { - "accuracy": 0.9450795240700245, + "accuracy": 0.9450446665287018, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -36515,7 +36515,7 @@ } }, { - "accuracy": 0.949484571814537, + "accuracy": 0.9494681656360626, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -36567,7 +36567,7 @@ } }, { - "accuracy": 0.9560906924307346, + "accuracy": 0.9560775049030781, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -36616,7 +36616,7 @@ } }, { - "accuracy": 0.9707168936729431, + "accuracy": 0.9707209877669811, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -36659,7 +36659,7 @@ } }, { - "accuracy": 0.9732743687927723, + "accuracy": 0.9732760544866323, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -36702,7 +36702,7 @@ } }, { - "accuracy": 0.9719167854636908, + "accuracy": 0.9719307515770197, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -36754,7 +36754,7 @@ } }, { - "accuracy": 0.9754060637205839, + "accuracy": 0.9753952287137508, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -36806,7 +36806,7 @@ } }, { - "accuracy": 0.9856863981112838, + "accuracy": 0.9856886547058821, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -36858,7 +36858,7 @@ } }, { - "accuracy": 0.9877114491537213, + "accuracy": 0.9877165118232369, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -36910,7 +36910,7 @@ } }, { - "accuracy": 0.9919731765985489, + "accuracy": 0.9919724566861987, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -36953,7 +36953,7 @@ } }, { - "accuracy": 0.9926252705045044, + "accuracy": 0.9926214762963355, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -37002,7 +37002,7 @@ } }, { - "accuracy": 0.9940348407253623, + "accuracy": 0.9940323028713465, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -37048,7 +37048,7 @@ } }, { - "accuracy": 0.9977936870418489, + "accuracy": 0.997793102869764, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -37090,7 +37090,7 @@ ], "model.layers.19.self_attn": [ { - "accuracy": 0.9066938832402229, + "accuracy": 0.9067771062254906, "total_bits": 30308928, "q_proj": { "group_size": { @@ -37154,7 +37154,7 @@ } }, { - "accuracy": 0.9125282913446426, + "accuracy": 0.9125330448150635, "total_bits": 31455808, "q_proj": { "group_size": { @@ -37218,7 +37218,7 @@ } }, { - "accuracy": 0.9224199429154396, + "accuracy": 0.9224216490983963, "total_bits": 33412832, "q_proj": { "group_size": { @@ -37282,7 +37282,7 @@ } }, { - "accuracy": 0.942340325564146, + "accuracy": 0.9422799944877625, "total_bits": 37983200, "q_proj": { "group_size": { @@ -37346,7 +37346,7 @@ } }, { - "accuracy": 0.9528491497039795, + "accuracy": 0.9527867995202541, "total_bits": 44838176, "q_proj": { "group_size": { @@ -37410,7 +37410,7 @@ } }, { - "accuracy": 0.9537361785769463, + "accuracy": 0.9536703340709209, "total_bits": 44912768, "q_proj": { "group_size": { @@ -37474,7 +37474,7 @@ } }, { - "accuracy": 0.9727440439164639, + "accuracy": 0.9727084040641785, "total_bits": 57355552, "q_proj": { "group_size": { @@ -37526,7 +37526,7 @@ } }, { - "accuracy": 0.9736681282520294, + "accuracy": 0.9736226126551628, "total_bits": 57430144, "q_proj": { "group_size": { @@ -37578,7 +37578,7 @@ } }, { - "accuracy": 0.9758165907114744, + "accuracy": 0.9758408833295107, "total_bits": 57950464, "q_proj": { "group_size": { @@ -37630,7 +37630,7 @@ } }, { - "accuracy": 0.9769553057849407, + "accuracy": 0.976946609094739, "total_bits": 58692736, "q_proj": { "group_size": { @@ -37682,7 +37682,7 @@ } }, { - "accuracy": 0.9764603525400162, + "accuracy": 0.9764624573290348, "total_bits": 59068544, "q_proj": { "group_size": { @@ -37746,7 +37746,7 @@ } }, { - "accuracy": 0.9781377725303173, + "accuracy": 0.9780647847801447, "total_bits": 59588864, "q_proj": { "group_size": { @@ -37810,7 +37810,7 @@ } }, { - "accuracy": 0.9803321305662394, + "accuracy": 0.9802794177085161, "total_bits": 61536832, "q_proj": { "group_size": { @@ -37871,7 +37871,7 @@ } }, { - "accuracy": 0.9818277545273304, + "accuracy": 0.9817991387099028, "total_bits": 62354560, "q_proj": { "group_size": { @@ -37932,7 +37932,7 @@ } }, { - "accuracy": 0.9891065321862698, + "accuracy": 0.9891098402440548, "total_bits": 75246880, "q_proj": { "group_size": { @@ -37993,7 +37993,7 @@ } }, { - "accuracy": 0.9908470399677753, + "accuracy": 0.9908483978360891, "total_bits": 76510336, "q_proj": { "group_size": { @@ -38054,7 +38054,7 @@ } }, { - "accuracy": 0.9929368873126805, + "accuracy": 0.9929364663548768, "total_bits": 85667104, "q_proj": { "group_size": { @@ -38106,7 +38106,7 @@ } }, { - "accuracy": 0.9951926521025598, + "accuracy": 0.9951782925054431, "total_bits": 91722880, "q_proj": { "group_size": { @@ -38158,7 +38158,7 @@ } }, { - "accuracy": 0.9981394049245864, + "accuracy": 0.9981401044642553, "total_bits": 113978656, "q_proj": { "group_size": { @@ -38212,7 +38212,7 @@ ], "model.layers.19.mlp": [ { - "accuracy": 0.898843489587307, + "accuracy": 0.8988735228776932, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -38264,7 +38264,7 @@ } }, { - "accuracy": 0.9021168202161789, + "accuracy": 0.9020884037017822, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -38316,7 +38316,7 @@ } }, { - "accuracy": 0.9158534109592438, + "accuracy": 0.9158180356025696, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -38365,7 +38365,7 @@ } }, { - "accuracy": 0.9202790409326553, + "accuracy": 0.9202406033873558, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -38414,7 +38414,7 @@ } }, { - "accuracy": 0.9486076608300209, + "accuracy": 0.9486297108232975, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -38466,7 +38466,7 @@ } }, { - "accuracy": 0.9527660310268402, + "accuracy": 0.9527538195252419, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -38518,7 +38518,7 @@ } }, { - "accuracy": 0.9588682539761066, + "accuracy": 0.9588394649326801, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -38567,7 +38567,7 @@ } }, { - "accuracy": 0.9728159811347723, + "accuracy": 0.9728188179433346, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -38610,7 +38610,7 @@ } }, { - "accuracy": 0.9751972481608391, + "accuracy": 0.9751956835389137, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -38653,7 +38653,7 @@ } }, { - "accuracy": 0.9737109690904617, + "accuracy": 0.9737076684832573, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -38705,7 +38705,7 @@ } }, { - "accuracy": 0.9769691210240126, + "accuracy": 0.976981945335865, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -38757,7 +38757,7 @@ } }, { - "accuracy": 0.986573307774961, + "accuracy": 0.9865698497742414, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -38809,7 +38809,7 @@ } }, { - "accuracy": 0.9884986458346248, + "accuracy": 0.9884996181353927, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -38861,7 +38861,7 @@ } }, { - "accuracy": 0.9925676062703133, + "accuracy": 0.9925639107823372, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -38904,7 +38904,7 @@ } }, { - "accuracy": 0.993094963952899, + "accuracy": 0.993094386998564, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -38953,7 +38953,7 @@ } }, { - "accuracy": 0.9943557516671717, + "accuracy": 0.9943572375923395, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -38999,7 +38999,7 @@ } }, { - "accuracy": 0.9979684855788946, + "accuracy": 0.9979692660272121, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -39041,7 +39041,7 @@ ], "model.layers.20.self_attn": [ { - "accuracy": 0.9254191517829895, + "accuracy": 0.9252974018454552, "total_bits": 30308928, "q_proj": { "group_size": { @@ -39105,7 +39105,7 @@ } }, { - "accuracy": 0.9304513037204742, + "accuracy": 0.9304210692644119, "total_bits": 31455808, "q_proj": { "group_size": { @@ -39169,7 +39169,7 @@ } }, { - "accuracy": 0.937650989741087, + "accuracy": 0.9376390874385834, "total_bits": 33412832, "q_proj": { "group_size": { @@ -39233,7 +39233,7 @@ } }, { - "accuracy": 0.9561277441680431, + "accuracy": 0.956094067543745, "total_bits": 37983200, "q_proj": { "group_size": { @@ -39297,7 +39297,7 @@ } }, { - "accuracy": 0.9629123508930206, + "accuracy": 0.9629477187991142, "total_bits": 44838176, "q_proj": { "group_size": { @@ -39361,7 +39361,7 @@ } }, { - "accuracy": 0.9635394588112831, + "accuracy": 0.9635471068322659, "total_bits": 44912768, "q_proj": { "group_size": { @@ -39425,7 +39425,7 @@ } }, { - "accuracy": 0.9784403182566166, + "accuracy": 0.9784409739077091, "total_bits": 57355552, "q_proj": { "group_size": { @@ -39477,7 +39477,7 @@ } }, { - "accuracy": 0.9791186898946762, + "accuracy": 0.9791207388043404, "total_bits": 57430144, "q_proj": { "group_size": { @@ -39529,7 +39529,7 @@ } }, { - "accuracy": 0.9805207420140505, + "accuracy": 0.9805529490113258, "total_bits": 57950464, "q_proj": { "group_size": { @@ -39581,7 +39581,7 @@ } }, { - "accuracy": 0.981576981022954, + "accuracy": 0.9815756008028984, "total_bits": 58692736, "q_proj": { "group_size": { @@ -39633,7 +39633,7 @@ } }, { - "accuracy": 0.9814564771950245, + "accuracy": 0.981438597664237, "total_bits": 59068544, "q_proj": { "group_size": { @@ -39697,7 +39697,7 @@ } }, { - "accuracy": 0.9826366528868675, + "accuracy": 0.9826178383082151, "total_bits": 59588864, "q_proj": { "group_size": { @@ -39761,7 +39761,7 @@ } }, { - "accuracy": 0.9840204436331987, + "accuracy": 0.9840060472488403, "total_bits": 61536832, "q_proj": { "group_size": { @@ -39822,7 +39822,7 @@ } }, { - "accuracy": 0.9851696165278554, + "accuracy": 0.9851967319846153, "total_bits": 62354560, "q_proj": { "group_size": { @@ -39883,7 +39883,7 @@ } }, { - "accuracy": 0.9912118604406714, + "accuracy": 0.99119614623487, "total_bits": 75246880, "q_proj": { "group_size": { @@ -39944,7 +39944,7 @@ } }, { - "accuracy": 0.9925604872405529, + "accuracy": 0.992552753072232, "total_bits": 76510336, "q_proj": { "group_size": { @@ -40005,7 +40005,7 @@ } }, { - "accuracy": 0.9943439392372966, + "accuracy": 0.9943421361967921, "total_bits": 85667104, "q_proj": { "group_size": { @@ -40057,7 +40057,7 @@ } }, { - "accuracy": 0.9961527525447309, + "accuracy": 0.996142350602895, "total_bits": 91722880, "q_proj": { "group_size": { @@ -40109,7 +40109,7 @@ } }, { - "accuracy": 0.9985169863793999, + "accuracy": 0.9985177288763225, "total_bits": 113978656, "q_proj": { "group_size": { @@ -40163,7 +40163,7 @@ ], "model.layers.20.mlp": [ { - "accuracy": 0.9078842997550964, + "accuracy": 0.9078511148691177, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -40215,7 +40215,7 @@ } }, { - "accuracy": 0.9108857437968254, + "accuracy": 0.9108638241887093, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -40267,7 +40267,7 @@ } }, { - "accuracy": 0.9233747720718384, + "accuracy": 0.9233528077602386, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -40316,7 +40316,7 @@ } }, { - "accuracy": 0.927286222577095, + "accuracy": 0.9272778257727623, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -40365,7 +40365,7 @@ } }, { - "accuracy": 0.9533279724419117, + "accuracy": 0.9533239230513573, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -40417,7 +40417,7 @@ } }, { - "accuracy": 0.9570191614329815, + "accuracy": 0.9570236355066299, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -40469,7 +40469,7 @@ } }, { - "accuracy": 0.9625181145966053, + "accuracy": 0.962521106004715, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -40518,7 +40518,7 @@ } }, { - "accuracy": 0.9753356482833624, + "accuracy": 0.9753350578248501, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -40561,7 +40561,7 @@ } }, { - "accuracy": 0.977478351444006, + "accuracy": 0.9774747285991907, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -40604,7 +40604,7 @@ } }, { - "accuracy": 0.9761522505432367, + "accuracy": 0.9761437401175499, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -40656,7 +40656,7 @@ } }, { - "accuracy": 0.9790751449763775, + "accuracy": 0.9790640287101269, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -40708,7 +40708,7 @@ } }, { - "accuracy": 0.9878394166007638, + "accuracy": 0.9878420624881983, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -40760,7 +40760,7 @@ } }, { - "accuracy": 0.9895561756566167, + "accuracy": 0.9895521271973848, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -40812,7 +40812,7 @@ } }, { - "accuracy": 0.9932784507982433, + "accuracy": 0.993278375826776, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -40855,7 +40855,7 @@ } }, { - "accuracy": 0.9937723437324166, + "accuracy": 0.9937705183401704, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -40904,7 +40904,7 @@ } }, { - "accuracy": 0.9949068506248295, + "accuracy": 0.9949048236012459, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -40950,7 +40950,7 @@ } }, { - "accuracy": 0.9981734347529709, + "accuracy": 0.9981726757250726, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -40992,7 +40992,7 @@ ], "model.layers.21.self_attn": [ { - "accuracy": 0.9116751402616501, + "accuracy": 0.9116267189383507, "total_bits": 30308928, "q_proj": { "group_size": { @@ -41056,7 +41056,7 @@ } }, { - "accuracy": 0.9161897599697113, + "accuracy": 0.9161302745342255, "total_bits": 31455808, "q_proj": { "group_size": { @@ -41120,7 +41120,7 @@ } }, { - "accuracy": 0.9299630001187325, + "accuracy": 0.9298892468214035, "total_bits": 33412832, "q_proj": { "group_size": { @@ -41184,7 +41184,7 @@ } }, { - "accuracy": 0.951121561229229, + "accuracy": 0.951112475246191, "total_bits": 37983200, "q_proj": { "group_size": { @@ -41248,7 +41248,7 @@ } }, { - "accuracy": 0.9554766491055489, + "accuracy": 0.9555105529725552, "total_bits": 44838176, "q_proj": { "group_size": { @@ -41312,7 +41312,7 @@ } }, { - "accuracy": 0.9563397578895092, + "accuracy": 0.9563395045697689, "total_bits": 44912768, "q_proj": { "group_size": { @@ -41376,7 +41376,7 @@ } }, { - "accuracy": 0.9752995986491442, + "accuracy": 0.9753066599369049, "total_bits": 57355552, "q_proj": { "group_size": { @@ -41428,7 +41428,7 @@ } }, { - "accuracy": 0.976159306243062, + "accuracy": 0.9761861730366945, "total_bits": 57430144, "q_proj": { "group_size": { @@ -41480,7 +41480,7 @@ } }, { - "accuracy": 0.9777697827666998, + "accuracy": 0.9777861218899488, "total_bits": 57950464, "q_proj": { "group_size": { @@ -41532,7 +41532,7 @@ } }, { - "accuracy": 0.9786687269806862, + "accuracy": 0.9786933194845915, "total_bits": 58692736, "q_proj": { "group_size": { @@ -41584,7 +41584,7 @@ } }, { - "accuracy": 0.9778354410082102, + "accuracy": 0.9778481628745794, "total_bits": 59068544, "q_proj": { "group_size": { @@ -41648,7 +41648,7 @@ } }, { - "accuracy": 0.9793091975152493, + "accuracy": 0.9793014619499445, "total_bits": 59588864, "q_proj": { "group_size": { @@ -41712,7 +41712,7 @@ } }, { - "accuracy": 0.9823502618819475, + "accuracy": 0.9823220800608397, "total_bits": 61536832, "q_proj": { "group_size": { @@ -41773,7 +41773,7 @@ } }, { - "accuracy": 0.9835798889398575, + "accuracy": 0.9835791904479265, "total_bits": 62354560, "q_proj": { "group_size": { @@ -41834,7 +41834,7 @@ } }, { - "accuracy": 0.9903135802596807, + "accuracy": 0.9903201386332512, "total_bits": 75246880, "q_proj": { "group_size": { @@ -41895,7 +41895,7 @@ } }, { - "accuracy": 0.9917551334947348, + "accuracy": 0.9917648062109947, "total_bits": 76510336, "q_proj": { "group_size": { @@ -41956,7 +41956,7 @@ } }, { - "accuracy": 0.993569896556437, + "accuracy": 0.9935718071646988, "total_bits": 85667104, "q_proj": { "group_size": { @@ -42008,7 +42008,7 @@ } }, { - "accuracy": 0.9958970271982253, + "accuracy": 0.995899414177984, "total_bits": 91722880, "q_proj": { "group_size": { @@ -42060,7 +42060,7 @@ } }, { - "accuracy": 0.9983291744720191, + "accuracy": 0.9983296216232702, "total_bits": 113978656, "q_proj": { "group_size": { @@ -42114,7 +42114,7 @@ ], "model.layers.21.mlp": [ { - "accuracy": 0.9132195189595222, + "accuracy": 0.9132231697440147, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -42166,7 +42166,7 @@ } }, { - "accuracy": 0.9158970862627029, + "accuracy": 0.9158701747655869, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -42218,7 +42218,7 @@ } }, { - "accuracy": 0.9271287769079208, + "accuracy": 0.9270192459225655, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -42267,7 +42267,7 @@ } }, { - "accuracy": 0.9307196065783501, + "accuracy": 0.9306086674332619, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -42316,7 +42316,7 @@ } }, { - "accuracy": 0.955941203981638, + "accuracy": 0.9559643566608429, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -42368,7 +42368,7 @@ } }, { - "accuracy": 0.9593969620764256, + "accuracy": 0.9593847468495369, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -42420,7 +42420,7 @@ } }, { - "accuracy": 0.9644332267343998, + "accuracy": 0.9644359201192856, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -42469,7 +42469,7 @@ } }, { - "accuracy": 0.9767589289695024, + "accuracy": 0.9767424874007702, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -42512,7 +42512,7 @@ } }, { - "accuracy": 0.9787649121135473, + "accuracy": 0.9787469636648893, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -42555,7 +42555,7 @@ } }, { - "accuracy": 0.9774891547858715, + "accuracy": 0.9774995297193527, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -42607,7 +42607,7 @@ } }, { - "accuracy": 0.9802252352237701, + "accuracy": 0.9802324343472719, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -42659,7 +42659,7 @@ } }, { - "accuracy": 0.9885327704250813, + "accuracy": 0.9885260602459311, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -42711,7 +42711,7 @@ } }, { - "accuracy": 0.9901311350986362, + "accuracy": 0.9901291783899069, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -42763,7 +42763,7 @@ } }, { - "accuracy": 0.9936433634720743, + "accuracy": 0.9936425597406924, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -42806,7 +42806,7 @@ } }, { - "accuracy": 0.9941183044575155, + "accuracy": 0.9941207375377417, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -42855,7 +42855,7 @@ } }, { - "accuracy": 0.995133682154119, + "accuracy": 0.9951349799521267, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -42901,7 +42901,7 @@ } }, { - "accuracy": 0.9982258901000023, + "accuracy": 0.9982257036026567, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -42943,7 +42943,7 @@ ], "model.layers.22.self_attn": [ { - "accuracy": 0.8600220680236816, + "accuracy": 0.8604224324226379, "total_bits": 30308928, "q_proj": { "group_size": { @@ -43007,7 +43007,7 @@ } }, { - "accuracy": 0.8723062872886658, + "accuracy": 0.8720817565917969, "total_bits": 31455808, "q_proj": { "group_size": { @@ -43071,7 +43071,7 @@ } }, { - "accuracy": 0.8845950290560722, + "accuracy": 0.8842579200863838, "total_bits": 33412832, "q_proj": { "group_size": { @@ -43135,7 +43135,7 @@ } }, { - "accuracy": 0.9110095426440239, + "accuracy": 0.9107557758688927, "total_bits": 37983200, "q_proj": { "group_size": { @@ -43199,7 +43199,7 @@ } }, { - "accuracy": 0.9318739250302315, + "accuracy": 0.9317922592163086, "total_bits": 44838176, "q_proj": { "group_size": { @@ -43263,7 +43263,7 @@ } }, { - "accuracy": 0.9331164434552193, + "accuracy": 0.933014988899231, "total_bits": 44912768, "q_proj": { "group_size": { @@ -43327,7 +43327,7 @@ } }, { - "accuracy": 0.9576672576367855, + "accuracy": 0.9576846696436405, "total_bits": 57355552, "q_proj": { "group_size": { @@ -43379,7 +43379,7 @@ } }, { - "accuracy": 0.9591643884778023, + "accuracy": 0.9591707922518253, "total_bits": 57430144, "q_proj": { "group_size": { @@ -43431,7 +43431,7 @@ } }, { - "accuracy": 0.9620198979973793, + "accuracy": 0.9620674923062325, "total_bits": 57950464, "q_proj": { "group_size": { @@ -43483,7 +43483,7 @@ } }, { - "accuracy": 0.9642005227506161, + "accuracy": 0.9641453549265862, "total_bits": 58692736, "q_proj": { "group_size": { @@ -43535,7 +43535,7 @@ } }, { - "accuracy": 0.9660037606954575, + "accuracy": 0.9659963771700859, "total_bits": 59068544, "q_proj": { "group_size": { @@ -43599,7 +43599,7 @@ } }, { - "accuracy": 0.968350138515234, + "accuracy": 0.9683596417307854, "total_bits": 59588864, "q_proj": { "group_size": { @@ -43663,7 +43663,7 @@ } }, { - "accuracy": 0.9704683814197779, + "accuracy": 0.970446715131402, "total_bits": 61536832, "q_proj": { "group_size": { @@ -43724,7 +43724,7 @@ } }, { - "accuracy": 0.9727780558168888, + "accuracy": 0.9728066977113485, "total_bits": 62354560, "q_proj": { "group_size": { @@ -43785,7 +43785,7 @@ } }, { - "accuracy": 0.9837654158473015, + "accuracy": 0.9837965164333582, "total_bits": 75246880, "q_proj": { "group_size": { @@ -43846,7 +43846,7 @@ } }, { - "accuracy": 0.9864024138078094, + "accuracy": 0.9864012431353331, "total_bits": 76510336, "q_proj": { "group_size": { @@ -43907,7 +43907,7 @@ } }, { - "accuracy": 0.988879844546318, + "accuracy": 0.9888947391882539, "total_bits": 85667104, "q_proj": { "group_size": { @@ -43959,7 +43959,7 @@ } }, { - "accuracy": 0.9924624958075583, + "accuracy": 0.9924732316285372, "total_bits": 91722880, "q_proj": { "group_size": { @@ -44011,7 +44011,7 @@ } }, { - "accuracy": 0.9970627035945654, + "accuracy": 0.997059682616964, "total_bits": 113978656, "q_proj": { "group_size": { @@ -44065,7 +44065,7 @@ ], "model.layers.22.mlp": [ { - "accuracy": 0.9158392548561096, + "accuracy": 0.9158530607819557, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -44117,7 +44117,7 @@ } }, { - "accuracy": 0.9184657260775566, + "accuracy": 0.9185132682323456, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -44169,7 +44169,7 @@ } }, { - "accuracy": 0.9284176975488663, + "accuracy": 0.9284360781311989, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -44218,7 +44218,7 @@ } }, { - "accuracy": 0.9315355196595192, + "accuracy": 0.9315476194024086, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -44267,7 +44267,7 @@ } }, { - "accuracy": 0.9572760388255119, + "accuracy": 0.9572666324675083, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -44319,7 +44319,7 @@ } }, { - "accuracy": 0.9605343677103519, + "accuracy": 0.9605594612658024, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -44371,7 +44371,7 @@ } }, { - "accuracy": 0.9649710729718208, + "accuracy": 0.9650028869509697, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -44420,7 +44420,7 @@ } }, { - "accuracy": 0.9774835146963596, + "accuracy": 0.9774914421141148, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -44463,7 +44463,7 @@ } }, { - "accuracy": 0.9793780688196421, + "accuracy": 0.9793844223022461, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -44506,7 +44506,7 @@ } }, { - "accuracy": 0.9781624637544155, + "accuracy": 0.9781443998217583, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -44558,7 +44558,7 @@ } }, { - "accuracy": 0.9807386361062527, + "accuracy": 0.9807199090719223, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -44610,7 +44610,7 @@ } }, { - "accuracy": 0.988841949030757, + "accuracy": 0.9888424472883344, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -44662,7 +44662,7 @@ } }, { - "accuracy": 0.9903620686382055, + "accuracy": 0.9903717981651425, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -44714,7 +44714,7 @@ } }, { - "accuracy": 0.9938594889827073, + "accuracy": 0.9938576300628483, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -44757,7 +44757,7 @@ } }, { - "accuracy": 0.9942582775838673, + "accuracy": 0.994258991908282, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -44806,7 +44806,7 @@ } }, { - "accuracy": 0.9951435467228293, + "accuracy": 0.9951435327529907, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -44852,7 +44852,7 @@ } }, { - "accuracy": 0.9983168488834053, + "accuracy": 0.9983171256026253, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -44894,7 +44894,7 @@ ], "model.layers.23.self_attn": [ { - "accuracy": 0.9002235606312752, + "accuracy": 0.9000980034470558, "total_bits": 30308928, "q_proj": { "group_size": { @@ -44958,7 +44958,7 @@ } }, { - "accuracy": 0.9065486118197441, + "accuracy": 0.9066225737333298, "total_bits": 31455808, "q_proj": { "group_size": { @@ -45022,7 +45022,7 @@ } }, { - "accuracy": 0.918041318655014, + "accuracy": 0.9180461913347244, "total_bits": 33412832, "q_proj": { "group_size": { @@ -45086,7 +45086,7 @@ } }, { - "accuracy": 0.9384324066340923, + "accuracy": 0.9384226053953171, "total_bits": 37983200, "q_proj": { "group_size": { @@ -45150,7 +45150,7 @@ } }, { - "accuracy": 0.9503814466297626, + "accuracy": 0.950372826308012, "total_bits": 44838176, "q_proj": { "group_size": { @@ -45214,7 +45214,7 @@ } }, { - "accuracy": 0.9512348845601082, + "accuracy": 0.9512467607855797, "total_bits": 44912768, "q_proj": { "group_size": { @@ -45278,7 +45278,7 @@ } }, { - "accuracy": 0.970819503068924, + "accuracy": 0.9708252772688866, "total_bits": 57355552, "q_proj": { "group_size": { @@ -45330,7 +45330,7 @@ } }, { - "accuracy": 0.9717874713242054, + "accuracy": 0.9717806540429592, "total_bits": 57430144, "q_proj": { "group_size": { @@ -45382,7 +45382,7 @@ } }, { - "accuracy": 0.9738185089081526, + "accuracy": 0.9737685434520245, "total_bits": 57950464, "q_proj": { "group_size": { @@ -45434,7 +45434,7 @@ } }, { - "accuracy": 0.9751602187752724, + "accuracy": 0.9751146398484707, "total_bits": 58692736, "q_proj": { "group_size": { @@ -45486,7 +45486,7 @@ } }, { - "accuracy": 0.9751874227076769, + "accuracy": 0.9751594942063093, "total_bits": 59068544, "q_proj": { "group_size": { @@ -45550,7 +45550,7 @@ } }, { - "accuracy": 0.9768790081143379, + "accuracy": 0.9768443964421749, "total_bits": 59588864, "q_proj": { "group_size": { @@ -45614,7 +45614,7 @@ } }, { - "accuracy": 0.9792286064475775, + "accuracy": 0.9791899565607309, "total_bits": 61536832, "q_proj": { "group_size": { @@ -45675,7 +45675,7 @@ } }, { - "accuracy": 0.9807712472975254, + "accuracy": 0.9807248823344707, "total_bits": 62354560, "q_proj": { "group_size": { @@ -45736,7 +45736,7 @@ } }, { - "accuracy": 0.9885837156325579, + "accuracy": 0.98859893810004, "total_bits": 75246880, "q_proj": { "group_size": { @@ -45797,7 +45797,7 @@ } }, { - "accuracy": 0.9903419725596905, + "accuracy": 0.9903517542406917, "total_bits": 76510336, "q_proj": { "group_size": { @@ -45858,7 +45858,7 @@ } }, { - "accuracy": 0.9923813547939062, + "accuracy": 0.9923766339197755, "total_bits": 85667104, "q_proj": { "group_size": { @@ -45910,7 +45910,7 @@ } }, { - "accuracy": 0.9948961278423667, + "accuracy": 0.9948966847732663, "total_bits": 91722880, "q_proj": { "group_size": { @@ -45962,7 +45962,7 @@ } }, { - "accuracy": 0.9980002944357693, + "accuracy": 0.9980048832949251, "total_bits": 113978656, "q_proj": { "group_size": { @@ -46016,7 +46016,7 @@ ], "model.layers.23.mlp": [ { - "accuracy": 0.9183400496840477, + "accuracy": 0.9182481989264488, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -46068,7 +46068,7 @@ } }, { - "accuracy": 0.9207602441310883, + "accuracy": 0.9207983389496803, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -46120,7 +46120,7 @@ } }, { - "accuracy": 0.9304101318120956, + "accuracy": 0.9304468557238579, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -46169,7 +46169,7 @@ } }, { - "accuracy": 0.9335396587848663, + "accuracy": 0.9335753992199898, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -46218,7 +46218,7 @@ } }, { - "accuracy": 0.95809031650424, + "accuracy": 0.9580657295882702, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -46270,7 +46270,7 @@ } }, { - "accuracy": 0.9615879841148853, + "accuracy": 0.9615712836384773, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -46322,7 +46322,7 @@ } }, { - "accuracy": 0.9659944400191307, + "accuracy": 0.9659555852413177, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -46371,7 +46371,7 @@ } }, { - "accuracy": 0.9778957311064005, + "accuracy": 0.9778776671737432, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -46414,7 +46414,7 @@ } }, { - "accuracy": 0.9797942880541086, + "accuracy": 0.9797797352075577, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -46457,7 +46457,7 @@ } }, { - "accuracy": 0.9784904830157757, + "accuracy": 0.9784805551171303, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -46509,7 +46509,7 @@ } }, { - "accuracy": 0.9812441393733025, + "accuracy": 0.9812456294894218, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -46561,7 +46561,7 @@ } }, { - "accuracy": 0.9889877140522003, + "accuracy": 0.9889850988984108, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -46613,7 +46613,7 @@ } }, { - "accuracy": 0.9906198102980852, + "accuracy": 0.9906222112476826, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -46665,7 +46665,7 @@ } }, { - "accuracy": 0.9938806621357799, + "accuracy": 0.9938766132108867, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -46708,7 +46708,7 @@ } }, { - "accuracy": 0.9943293896503747, + "accuracy": 0.9943245043978095, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -46757,7 +46757,7 @@ } }, { - "accuracy": 0.9952116911299527, + "accuracy": 0.9952051844447851, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -46803,7 +46803,7 @@ } }, { - "accuracy": 0.9983016916085035, + "accuracy": 0.9983023661188781, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -46845,7 +46845,7 @@ ], "model.layers.24.self_attn": [ { - "accuracy": 0.9000411629676819, + "accuracy": 0.9000758603215218, "total_bits": 30308928, "q_proj": { "group_size": { @@ -46909,7 +46909,7 @@ } }, { - "accuracy": 0.9092481806874275, + "accuracy": 0.9089899435639381, "total_bits": 31455808, "q_proj": { "group_size": { @@ -46973,7 +46973,7 @@ } }, { - "accuracy": 0.9228631407022476, + "accuracy": 0.9227519929409027, "total_bits": 33412832, "q_proj": { "group_size": { @@ -47037,7 +47037,7 @@ } }, { - "accuracy": 0.9451075606048107, + "accuracy": 0.9450107850134373, "total_bits": 37983200, "q_proj": { "group_size": { @@ -47101,7 +47101,7 @@ } }, { - "accuracy": 0.9511439241468906, + "accuracy": 0.9511034078896046, "total_bits": 44838176, "q_proj": { "group_size": { @@ -47165,7 +47165,7 @@ } }, { - "accuracy": 0.9521462395787239, + "accuracy": 0.9521391801536083, "total_bits": 44912768, "q_proj": { "group_size": { @@ -47229,7 +47229,7 @@ } }, { - "accuracy": 0.9704149011522532, + "accuracy": 0.9703994449228048, "total_bits": 57355552, "q_proj": { "group_size": { @@ -47281,7 +47281,7 @@ } }, { - "accuracy": 0.9715412594377995, + "accuracy": 0.9715345222502947, "total_bits": 57430144, "q_proj": { "group_size": { @@ -47333,7 +47333,7 @@ } }, { - "accuracy": 0.9736700374633074, + "accuracy": 0.9736871141940355, "total_bits": 57950464, "q_proj": { "group_size": { @@ -47385,7 +47385,7 @@ } }, { - "accuracy": 0.9749877713620663, + "accuracy": 0.9749988541007042, "total_bits": 58692736, "q_proj": { "group_size": { @@ -47437,7 +47437,7 @@ } }, { - "accuracy": 0.9757188335061073, + "accuracy": 0.9757179487496614, "total_bits": 59068544, "q_proj": { "group_size": { @@ -47501,7 +47501,7 @@ } }, { - "accuracy": 0.9774507116526365, + "accuracy": 0.9774703308939934, "total_bits": 59588864, "q_proj": { "group_size": { @@ -47565,7 +47565,7 @@ } }, { - "accuracy": 0.9802383426576853, + "accuracy": 0.9802435729652643, "total_bits": 61536832, "q_proj": { "group_size": { @@ -47626,7 +47626,7 @@ } }, { - "accuracy": 0.9814821984618902, + "accuracy": 0.9814784899353981, "total_bits": 62354560, "q_proj": { "group_size": { @@ -47687,7 +47687,7 @@ } }, { - "accuracy": 0.9890236798673868, + "accuracy": 0.9890194535255432, "total_bits": 75246880, "q_proj": { "group_size": { @@ -47748,7 +47748,7 @@ } }, { - "accuracy": 0.9906842615455389, + "accuracy": 0.9906802931800485, "total_bits": 76510336, "q_proj": { "group_size": { @@ -47809,7 +47809,7 @@ } }, { - "accuracy": 0.9923012899234891, + "accuracy": 0.992301972117275, "total_bits": 85667104, "q_proj": { "group_size": { @@ -47861,7 +47861,7 @@ } }, { - "accuracy": 0.9948786222375929, + "accuracy": 0.9948805985040963, "total_bits": 91722880, "q_proj": { "group_size": { @@ -47913,7 +47913,7 @@ } }, { - "accuracy": 0.997995373327285, + "accuracy": 0.9979949889238924, "total_bits": 113978656, "q_proj": { "group_size": { @@ -47967,7 +47967,7 @@ ], "model.layers.24.mlp": [ { - "accuracy": 0.92559964209795, + "accuracy": 0.9256472215056419, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -48019,7 +48019,7 @@ } }, { - "accuracy": 0.9278130382299423, + "accuracy": 0.9277902320027351, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -48071,7 +48071,7 @@ } }, { - "accuracy": 0.9358445852994919, + "accuracy": 0.9358237609267235, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -48120,7 +48120,7 @@ } }, { - "accuracy": 0.9386767745018005, + "accuracy": 0.9386411532759666, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -48169,7 +48169,7 @@ } }, { - "accuracy": 0.9618233405053616, + "accuracy": 0.9618184603750706, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -48221,7 +48221,7 @@ } }, { - "accuracy": 0.9650960303843021, + "accuracy": 0.9650743305683136, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -48273,7 +48273,7 @@ } }, { - "accuracy": 0.9686915799975395, + "accuracy": 0.9686741307377815, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -48322,7 +48322,7 @@ } }, { - "accuracy": 0.9795693270862103, + "accuracy": 0.9795593917369843, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -48365,7 +48365,7 @@ } }, { - "accuracy": 0.9815245009958744, + "accuracy": 0.9815181270241737, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -48408,7 +48408,7 @@ } }, { - "accuracy": 0.9804772697389126, + "accuracy": 0.9804782513529062, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -48460,7 +48460,7 @@ } }, { - "accuracy": 0.982961056753993, + "accuracy": 0.9829648770391941, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -48512,7 +48512,7 @@ } }, { - "accuracy": 0.9900317844003439, + "accuracy": 0.9900405630469322, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -48564,7 +48564,7 @@ } }, { - "accuracy": 0.9914947487413883, + "accuracy": 0.9914911538362503, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -48616,7 +48616,7 @@ } }, { - "accuracy": 0.994380182120949, + "accuracy": 0.9943781793117523, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -48659,7 +48659,7 @@ } }, { - "accuracy": 0.9948879033327103, + "accuracy": 0.9948839037679136, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -48708,7 +48708,7 @@ } }, { - "accuracy": 0.9956484977155924, + "accuracy": 0.9956428701989353, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -48754,7 +48754,7 @@ } }, { - "accuracy": 0.9983874682802707, + "accuracy": 0.9983870283467695, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -48796,7 +48796,7 @@ ], "model.layers.25.self_attn": [ { - "accuracy": 0.9237724244594574, + "accuracy": 0.923765018582344, "total_bits": 30308928, "q_proj": { "group_size": { @@ -48860,7 +48860,7 @@ } }, { - "accuracy": 0.9273674637079239, + "accuracy": 0.9273531660437584, "total_bits": 31455808, "q_proj": { "group_size": { @@ -48924,7 +48924,7 @@ } }, { - "accuracy": 0.9392730295658112, + "accuracy": 0.9392255060374737, "total_bits": 33412832, "q_proj": { "group_size": { @@ -48988,7 +48988,7 @@ } }, { - "accuracy": 0.9596496224403381, + "accuracy": 0.9596178978681564, "total_bits": 37983200, "q_proj": { "group_size": { @@ -49052,7 +49052,7 @@ } }, { - "accuracy": 0.9613975808024406, + "accuracy": 0.9613914974033833, "total_bits": 44838176, "q_proj": { "group_size": { @@ -49116,7 +49116,7 @@ } }, { - "accuracy": 0.9620046243071556, + "accuracy": 0.962019756436348, "total_bits": 44912768, "q_proj": { "group_size": { @@ -49180,7 +49180,7 @@ } }, { - "accuracy": 0.9788154270499945, + "accuracy": 0.9787868969142437, "total_bits": 57355552, "q_proj": { "group_size": { @@ -49232,7 +49232,7 @@ } }, { - "accuracy": 0.9793564602732658, + "accuracy": 0.9793211314827204, "total_bits": 57430144, "q_proj": { "group_size": { @@ -49284,7 +49284,7 @@ } }, { - "accuracy": 0.980896295979619, + "accuracy": 0.9808873273432255, "total_bits": 57950464, "q_proj": { "group_size": { @@ -49336,7 +49336,7 @@ } }, { - "accuracy": 0.9818288143724203, + "accuracy": 0.9818263165652752, "total_bits": 58692736, "q_proj": { "group_size": { @@ -49388,7 +49388,7 @@ } }, { - "accuracy": 0.9807142727077007, + "accuracy": 0.9806932359933853, "total_bits": 59068544, "q_proj": { "group_size": { @@ -49452,7 +49452,7 @@ } }, { - "accuracy": 0.9820475969463587, + "accuracy": 0.9820628892630339, "total_bits": 59588864, "q_proj": { "group_size": { @@ -49516,7 +49516,7 @@ } }, { - "accuracy": 0.9845238020643592, + "accuracy": 0.9845330221578479, "total_bits": 61536832, "q_proj": { "group_size": { @@ -49577,7 +49577,7 @@ } }, { - "accuracy": 0.9857827695086598, + "accuracy": 0.9857928091660142, "total_bits": 62354560, "q_proj": { "group_size": { @@ -49638,7 +49638,7 @@ } }, { - "accuracy": 0.9914994724094868, + "accuracy": 0.9915020493790507, "total_bits": 75246880, "q_proj": { "group_size": { @@ -49699,7 +49699,7 @@ } }, { - "accuracy": 0.9928717431612313, + "accuracy": 0.9928759732283652, "total_bits": 76510336, "q_proj": { "group_size": { @@ -49760,7 +49760,7 @@ } }, { - "accuracy": 0.9944902677088976, + "accuracy": 0.9944881168194115, "total_bits": 85667104, "q_proj": { "group_size": { @@ -49812,7 +49812,7 @@ } }, { - "accuracy": 0.9964965777471662, + "accuracy": 0.9964962496887892, "total_bits": 91722880, "q_proj": { "group_size": { @@ -49864,7 +49864,7 @@ } }, { - "accuracy": 0.9985775835812092, + "accuracy": 0.998579174396582, "total_bits": 113978656, "q_proj": { "group_size": { @@ -49918,7 +49918,7 @@ ], "model.layers.25.mlp": [ { - "accuracy": 0.9394260719418526, + "accuracy": 0.9394301772117615, "total_bits": 142969344, "gate_proj": { "group_size": { @@ -49970,7 +49970,7 @@ } }, { - "accuracy": 0.9412130787968636, + "accuracy": 0.9411459490656853, "total_bits": 148277760, "gate_proj": { "group_size": { @@ -50022,7 +50022,7 @@ } }, { - "accuracy": 0.9476935565471649, + "accuracy": 0.9477050378918648, "total_bits": 165462912, "gate_proj": { "group_size": { @@ -50071,7 +50071,7 @@ } }, { - "accuracy": 0.9500809907913208, + "accuracy": 0.9500880539417267, "total_bits": 185590656, "gate_proj": { "group_size": { @@ -50120,7 +50120,7 @@ } }, { - "accuracy": 0.9688832182437181, + "accuracy": 0.96889528632164, "total_bits": 209798784, "gate_proj": { "group_size": { @@ -50172,7 +50172,7 @@ } }, { - "accuracy": 0.9714504927396774, + "accuracy": 0.9714055731892586, "total_bits": 214864128, "gate_proj": { "group_size": { @@ -50224,7 +50224,7 @@ } }, { - "accuracy": 0.9744708389043808, + "accuracy": 0.9744217358529568, "total_bits": 230963904, "gate_proj": { "group_size": { @@ -50273,7 +50273,7 @@ } }, { - "accuracy": 0.9832967892289162, + "accuracy": 0.9833166711032391, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -50316,7 +50316,7 @@ } }, { - "accuracy": 0.9849021304398775, + "accuracy": 0.9849346242845058, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -50359,7 +50359,7 @@ } }, { - "accuracy": 0.9840464647859335, + "accuracy": 0.9840444251894951, "total_bits": 272393856, "gate_proj": { "group_size": { @@ -50411,7 +50411,7 @@ } }, { - "accuracy": 0.9860300803557038, + "accuracy": 0.9860321069136262, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -50463,7 +50463,7 @@ } }, { - "accuracy": 0.9918431174010038, + "accuracy": 0.9918465595692396, "total_bits": 334988928, "gate_proj": { "group_size": { @@ -50515,7 +50515,7 @@ } }, { - "accuracy": 0.9930016789585352, + "accuracy": 0.9929989366792142, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -50567,7 +50567,7 @@ } }, { - "accuracy": 0.9954412854276597, + "accuracy": 0.9954434814862907, "total_bits": 386967168, "gate_proj": { "group_size": { @@ -50610,7 +50610,7 @@ } }, { - "accuracy": 0.9958284390158951, + "accuracy": 0.9958340055309236, "total_bits": 400569408, "gate_proj": { "group_size": { @@ -50659,7 +50659,7 @@ } }, { - "accuracy": 0.9965037242509425, + "accuracy": 0.9965090290643275, "total_bits": 436548672, "gate_proj": { "group_size": { @@ -50705,7 +50705,7 @@ } }, { - "accuracy": 0.9987250744597986, + "accuracy": 0.9987249359255657, "total_bits": 512046144, "gate_proj": { "group_size": { @@ -50750,7 +50750,7 @@ }, "strategy": { "model.layers.0.self_attn": { - "accuracy": 0.9889282267540693, + "accuracy": 0.9889282938092947, "total_bits": 59588864, "q_proj": { "group_size": { @@ -50814,81 +50814,78 @@ } }, "model.layers.0.mlp": { - "accuracy": 0.9745050817728043, - "total_bits": 277459200, + "accuracy": 0.971364825963974, + "total_bits": 267653376, "gate_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, "4": 32 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.1.self_attn": { - "accuracy": 0.9933391944505274, - "total_bits": 85667104, + "accuracy": 0.9890200421214104, + "total_bits": 75246880, "q_proj": { "group_size": { - "6": 128 + "6": 128, + "5": 128 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 128 + "6": 128, + "5": 128 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, @@ -50906,19 +50903,22 @@ }, "o_proj": { "group_size": { - "6": 128 + "6": 128, + "5": 128 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 } }, "model.layers.1.mlp": { - "accuracy": 0.9747709520161152, + "accuracy": 0.9747742302715778, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -50970,12 +50970,12 @@ } }, "model.layers.2.self_attn": { - "accuracy": 0.9874144792556763, - "total_bits": 76510336, + "accuracy": 0.9853210505098104, + "total_bits": 75246880, "q_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128, + "5": 128 }, "bits": [ 6, @@ -50989,8 +50989,8 @@ }, "k_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128, + "5": 128 }, "bits": [ 6, @@ -51004,7 +51004,7 @@ }, "v_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -51016,8 +51016,8 @@ }, "o_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128, + "5": 128 }, "bits": [ 6, @@ -51031,7 +51031,7 @@ } }, "model.layers.2.mlp": { - "accuracy": 0.966707780957222, + "accuracy": 0.9667122215032578, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -51083,38 +51083,44 @@ } }, "model.layers.3.self_attn": { - "accuracy": 0.994609275367111, - "total_bits": 91722880, + "accuracy": 0.9891604781150818, + "total_bits": 76510336, "q_proj": { "group_size": { - "6": 32 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 32 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "8": 32 + "6": 32 }, "bits": [ - 8 + 6 ], "bits_prop": [ 1 @@ -51123,19 +51129,22 @@ }, "o_proj": { "group_size": { - "6": 32 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 } }, "model.layers.3.mlp": { - "accuracy": 0.9798263423144817, + "accuracy": 0.979815537109971, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -51187,11 +51196,11 @@ } }, "model.layers.4.self_attn": { - "accuracy": 0.9939752677455544, - "total_bits": 91722880, + "accuracy": 0.9909781841561198, + "total_bits": 85667104, "q_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -51203,7 +51212,7 @@ }, "k_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -51215,10 +51224,10 @@ }, "v_proj": { "group_size": { - "8": 32 + "6": 128 }, "bits": [ - 8 + 6 ], "bits_prop": [ 1 @@ -51227,7 +51236,7 @@ }, "o_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -51239,7 +51248,7 @@ } }, "model.layers.4.mlp": { - "accuracy": 0.9814701918512583, + "accuracy": 0.9814739804714918, "total_bits": 340054272, "gate_proj": { "group_size": { @@ -51291,12 +51300,12 @@ } }, "model.layers.5.self_attn": { - "accuracy": 0.9886289816349745, - "total_bits": 76510336, + "accuracy": 0.9866563268005848, + "total_bits": 75246880, "q_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128, + "5": 128 }, "bits": [ 6, @@ -51310,8 +51319,8 @@ }, "k_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128, + "5": 128 }, "bits": [ 6, @@ -51325,7 +51334,7 @@ }, "v_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -51337,8 +51346,8 @@ }, "o_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128, + "5": 128 }, "bits": [ 6, @@ -51352,7 +51361,7 @@ } }, "model.layers.5.mlp": { - "accuracy": 0.9695848729461432, + "accuracy": 0.9695756994187832, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -51404,35 +51413,41 @@ } }, "model.layers.6.self_attn": { - "accuracy": 0.9920955216512084, - "total_bits": 85667104, + "accuracy": 0.9893340524286032, + "total_bits": 76510336, "q_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "6": 128 + "6": 32 }, "bits": [ 6 @@ -51444,19 +51459,22 @@ }, "o_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 } }, "model.layers.6.mlp": { - "accuracy": 0.9682779163122177, + "accuracy": 0.9682538993656635, "total_bits": 277459200, "gate_proj": { "group_size": { @@ -51508,41 +51526,35 @@ } }, "model.layers.7.self_attn": { - "accuracy": 0.9881619503721595, - "total_bits": 76510336, + "accuracy": 0.9907346172258258, + "total_bits": 85667104, "q_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -51554,22 +51566,19 @@ }, "o_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 } }, "model.layers.7.mlp": { - "accuracy": 0.9720455892384052, + "accuracy": 0.9720457717776299, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -51612,38 +51621,44 @@ } }, "model.layers.8.self_attn": { - "accuracy": 0.9819746408611536, - "total_bits": 58692736, + "accuracy": 0.9848749991506338, + "total_bits": 62354560, "q_proj": { "group_size": { + "5": 32, "4": 32 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { + "5": 32, "4": 32 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "4": 32 + "5": 32 }, "bits": [ - 4 + 5 ], "bits_prop": [ 1 @@ -51652,71 +51667,65 @@ }, "o_proj": { "group_size": { + "5": 32, "4": 32 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 } }, "model.layers.8.mlp": { - "accuracy": 0.972480921074748, - "total_bits": 277459200, + "accuracy": 0.9705599583685398, + "total_bits": 267653376, "gate_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, "4": 32 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.9.self_attn": { - "accuracy": 0.9902575109153986, + "accuracy": 0.9902483588084579, "total_bits": 75246880, "q_proj": { "group_size": { @@ -51777,90 +51786,87 @@ } }, "model.layers.9.mlp": { - "accuracy": 0.9767951015383005, - "total_bits": 277459200, + "accuracy": 0.975043885409832, + "total_bits": 267653376, "gate_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, "4": 32 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.10.self_attn": { - "accuracy": 0.982187744230032, - "total_bits": 58692736, + "accuracy": 0.9842821806669235, + "total_bits": 61536832, "q_proj": { "group_size": { - "4": 32 + "5": 64, + "4": 64 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "4": 32 + "5": 64, + "4": 64 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "4": 32 + "5": 64 }, "bits": [ - 4 + 5 ], "bits_prop": [ 1 @@ -51869,23 +51875,26 @@ }, "o_proj": { "group_size": { - "4": 32 + "5": 64, + "4": 64 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 } }, "model.layers.10.mlp": { - "accuracy": 0.975079670548439, - "total_bits": 263770272, + "accuracy": 0.9773098323494196, + "total_bits": 267653376, "gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -51910,7 +51919,7 @@ "down_proj": { "group_size": { "8": 32, - "4": 128 + "4": 32 }, "bits": [ 8, @@ -51924,44 +51933,38 @@ } }, "model.layers.11.self_attn": { - "accuracy": 0.9868558822199702, - "total_bits": 62354560, + "accuracy": 0.9834803491830826, + "total_bits": 57950464, "q_proj": { "group_size": { - "5": 32, - "4": 32 + "4": 64 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "5": 32, - "4": 32 + "4": 64 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "5": 32 + "4": 32 }, "bits": [ - 5 + 4 ], "bits_prop": [ 1 @@ -51970,26 +51973,23 @@ }, "o_proj": { "group_size": { - "5": 32, - "4": 32 + "4": 64 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 } }, "model.layers.11.mlp": { - "accuracy": 0.9810750614851713, - "total_bits": 267653376, + "accuracy": 0.9791763704270124, + "total_bits": 263770272, "gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -52014,7 +52014,7 @@ "down_proj": { "group_size": { "8": 32, - "4": 32 + "4": 128 }, "bits": [ 8, @@ -52028,41 +52028,35 @@ } }, "model.layers.12.self_attn": { - "accuracy": 0.9914399096742272, - "total_bits": 76510336, + "accuracy": 0.9938817266374826, + "total_bits": 85667104, "q_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -52074,22 +52068,19 @@ }, "o_proj": { "group_size": { - "6": 32, - "5": 32 + "6": 128 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 } }, "model.layers.12.mlp": { - "accuracy": 0.9771162606775761, + "accuracy": 0.9771051350980997, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -52132,16 +52123,16 @@ } }, "model.layers.13.self_attn": { - "accuracy": 0.9835209306329489, - "total_bits": 62354560, + "accuracy": 0.9917331263422966, + "total_bits": 76510336, "q_proj": { "group_size": { - "5": 32, - "4": 32 + "6": 32, + "5": 32 }, "bits": [ - 5, - 4 + 6, + 5 ], "bits_prop": [ 0.1, @@ -52151,12 +52142,12 @@ }, "k_proj": { "group_size": { - "5": 32, - "4": 32 + "6": 32, + "5": 32 }, "bits": [ - 5, - 4 + 6, + 5 ], "bits_prop": [ 0.1, @@ -52166,10 +52157,10 @@ }, "v_proj": { "group_size": { - "5": 32 + "6": 32 }, "bits": [ - 5 + 6 ], "bits_prop": [ 1 @@ -52178,12 +52169,12 @@ }, "o_proj": { "group_size": { - "5": 32, - "4": 32 + "6": 32, + "5": 32 }, "bits": [ - 5, - 4 + 6, + 5 ], "bits_prop": [ 0.1, @@ -52193,7 +52184,7 @@ } }, "model.layers.13.mlp": { - "accuracy": 0.976823752745986, + "accuracy": 0.9768188558518887, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -52236,35 +52227,41 @@ } }, "model.layers.14.self_attn": { - "accuracy": 0.992810903582722, - "total_bits": 85667104, + "accuracy": 0.9902975112199783, + "total_bits": 76510336, "q_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "6": 128 + "6": 32 }, "bits": [ 6 @@ -52276,23 +52273,26 @@ }, "o_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 } }, "model.layers.14.mlp": { - "accuracy": 0.9745946303009987, - "total_bits": 263770272, + "accuracy": 0.9768539238721132, + "total_bits": 267653376, "gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -52317,7 +52317,7 @@ "down_proj": { "group_size": { "8": 32, - "4": 128 + "4": 32 }, "bits": [ 8, @@ -52331,7 +52331,7 @@ } }, "model.layers.15.self_attn": { - "accuracy": 0.987136579118669, + "accuracy": 0.9871381670236588, "total_bits": 76510336, "q_proj": { "group_size": { @@ -52392,59 +52392,50 @@ } }, "model.layers.15.mlp": { - "accuracy": 0.9747244548052549, - "total_bits": 277459200, + "accuracy": 0.9701192807406187, + "total_bits": 263770272, "gate_proj": { "group_size": { - "5": 32, - "4": 32 + "4": 128 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, - "4": 32 + "4": 128 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.16.self_attn": { - "accuracy": 0.9901310745626688, + "accuracy": 0.9901269851252437, "total_bits": 76510336, "q_proj": { "group_size": { @@ -52505,78 +52496,93 @@ } }, "model.layers.16.mlp": { - "accuracy": 0.97197007201612, - "total_bits": 263770272, + "accuracy": 0.9763544015586376, + "total_bits": 277459200, "gate_proj": { "group_size": { - "4": 128 + "5": 32, + "4": 32 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { + "5": 32, "4": 32 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.25, + 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "4": 128 + "5": 32, + "4": 32 }, "bits": [ 8, + 5, 4 ], "bits_prop": [ 0.05, - 0.95 + 0.1, + 0.85 ], "scale_bits": 4 } }, "model.layers.17.self_attn": { - "accuracy": 0.9923741007223725, - "total_bits": 85667104, + "accuracy": 0.9897674126550555, + "total_bits": 76510336, "q_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "6": 128 + "6": 32 }, "bits": [ 6 @@ -52588,71 +52594,65 @@ }, "o_proj": { "group_size": { - "6": 128 + "6": 32, + "5": 32 }, "bits": [ - 6 + 6, + 5 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 } }, "model.layers.17.mlp": { - "accuracy": 0.973537739366293, - "total_bits": 277459200, + "accuracy": 0.9712157864123583, + "total_bits": 267653376, "gate_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, "4": 32 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.18.self_attn": { - "accuracy": 0.9899277659133077, + "accuracy": 0.9899393441155553, "total_bits": 76510336, "q_proj": { "group_size": { @@ -52713,68 +52713,59 @@ } }, "model.layers.18.mlp": { - "accuracy": 0.9754060637205839, - "total_bits": 277459200, + "accuracy": 0.9732760544866323, + "total_bits": 267653376, "gate_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, "4": 32 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.19.self_attn": { - "accuracy": 0.9908470399677753, - "total_bits": 76510336, + "accuracy": 0.9817991387099028, + "total_bits": 62354560, "q_proj": { "group_size": { - "6": 32, - "5": 32 + "5": 32, + "4": 32 }, "bits": [ - 6, - 5 + 5, + 4 ], "bits_prop": [ 0.1, @@ -52784,12 +52775,12 @@ }, "k_proj": { "group_size": { - "6": 32, - "5": 32 + "5": 32, + "4": 32 }, "bits": [ - 6, - 5 + 5, + 4 ], "bits_prop": [ 0.1, @@ -52799,10 +52790,10 @@ }, "v_proj": { "group_size": { - "6": 32 + "5": 32 }, "bits": [ - 6 + 5 ], "bits_prop": [ 1 @@ -52811,12 +52802,12 @@ }, "o_proj": { "group_size": { - "6": 32, - "5": 32 + "5": 32, + "4": 32 }, "bits": [ - 6, - 5 + 5, + 4 ], "bits_prop": [ 0.1, @@ -52826,7 +52817,7 @@ } }, "model.layers.19.mlp": { - "accuracy": 0.9751972481608391, + "accuracy": 0.9751956835389137, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -52869,44 +52860,38 @@ } }, "model.layers.20.self_attn": { - "accuracy": 0.9851696165278554, - "total_bits": 62354560, + "accuracy": 0.996142350602895, + "total_bits": 91722880, "q_proj": { "group_size": { - "5": 32, - "4": 32 + "6": 32 }, "bits": [ - 5, - 4 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "5": 32, - "4": 32 + "6": 32 }, "bits": [ - 5, - 4 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "5": 32 + "8": 32 }, "bits": [ - 5 + 8 ], "bits_prop": [ 1 @@ -52915,111 +52900,93 @@ }, "o_proj": { "group_size": { - "5": 32, - "4": 32 + "6": 32 }, "bits": [ - 5, - 4 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 } }, "model.layers.20.mlp": { - "accuracy": 0.9790751449763775, - "total_bits": 277459200, + "accuracy": 0.9774747285991907, + "total_bits": 267653376, "gate_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, "4": 32 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.21.self_attn": { - "accuracy": 0.9903135802596807, - "total_bits": 75246880, + "accuracy": 0.995899414177984, + "total_bits": 91722880, "q_proj": { "group_size": { - "6": 128, - "5": 128 + "6": 32 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { - "6": 128, - "5": 128 + "6": 32 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { - "6": 128 + "8": 32 }, "bits": [ - 6 + 8 ], "bits_prop": [ 1 @@ -53028,22 +52995,19 @@ }, "o_proj": { "group_size": { - "6": 128, - "5": 128 + "6": 32 }, "bits": [ - 6, - 5 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 } }, "model.layers.21.mlp": { - "accuracy": 0.9787649121135473, + "accuracy": 0.9787469636648893, "total_bits": 267653376, "gate_proj": { "group_size": { @@ -53086,11 +53050,11 @@ } }, "model.layers.22.self_attn": { - "accuracy": 0.9924624958075583, - "total_bits": 91722880, + "accuracy": 0.9888947391882539, + "total_bits": 85667104, "q_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -53102,7 +53066,7 @@ }, "k_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -53114,10 +53078,10 @@ }, "v_proj": { "group_size": { - "8": 32 + "6": 128 }, "bits": [ - 8 + 6 ], "bits_prop": [ 1 @@ -53126,7 +53090,7 @@ }, "o_proj": { "group_size": { - "6": 32 + "6": 128 }, "bits": [ 6 @@ -53138,50 +53102,59 @@ } }, "model.layers.22.mlp": { - "accuracy": 0.9793780688196421, - "total_bits": 267653376, + "accuracy": 0.9807199090719223, + "total_bits": 277459200, "gate_proj": { "group_size": { + "5": 32, "4": 32 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.1, + 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { + "5": 32, "4": 32 }, "bits": [ + 5, 4 ], "bits_prop": [ - 1 + 0.25, + 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, + "5": 32, "4": 32 }, "bits": [ 8, + 5, 4 ], "bits_prop": [ 0.05, - 0.95 + 0.1, + 0.85 ], "scale_bits": 4 } }, "model.layers.23.self_attn": { - "accuracy": 0.9903419725596905, + "accuracy": 0.9903517542406917, "total_bits": 76510336, "q_proj": { "group_size": { @@ -53242,7 +53215,7 @@ } }, "model.layers.23.mlp": { - "accuracy": 0.9778957311064005, + "accuracy": 0.9778776671737432, "total_bits": 263770272, "gate_proj": { "group_size": { @@ -53285,7 +53258,7 @@ } }, "model.layers.24.self_attn": { - "accuracy": 0.9906842615455389, + "accuracy": 0.9906802931800485, "total_bits": 76510336, "q_proj": { "group_size": { @@ -53346,64 +53319,55 @@ } }, "model.layers.24.mlp": { - "accuracy": 0.982961056753993, - "total_bits": 277459200, + "accuracy": 0.9815181270241737, + "total_bits": 267653376, "gate_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "5": 32, "4": 32 }, "bits": [ - 5, 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "5": 32, "4": 32 }, "bits": [ 8, - 5, 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } }, "model.layers.25.self_attn": { - "accuracy": 0.9857827695086598, - "total_bits": 62354560, + "accuracy": 0.9845330221578479, + "total_bits": 61536832, "q_proj": { "group_size": { - "5": 32, - "4": 32 + "5": 64, + "4": 64 }, "bits": [ 5, @@ -53417,8 +53381,8 @@ }, "k_proj": { "group_size": { - "5": 32, - "4": 32 + "5": 64, + "4": 64 }, "bits": [ 5, @@ -53432,7 +53396,7 @@ }, "v_proj": { "group_size": { - "5": 32 + "5": 64 }, "bits": [ 5 @@ -53444,8 +53408,8 @@ }, "o_proj": { "group_size": { - "5": 32, - "4": 32 + "5": 64, + "4": 64 }, "bits": [ 5, @@ -53459,56 +53423,48 @@ } }, "model.layers.25.mlp": { - "accuracy": 0.9688832182437181, - "total_bits": 209798784, + "accuracy": 0.9849346242845058, + "total_bits": 267653376, "gate_proj": { "group_size": { - "4": 128, - "3": 128 + "4": 32 }, "bits": [ - 4, - 3 + 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { - "4": 128, - "3": 128 + "4": 32 }, "bits": [ - 4, - 3 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, - "4": 128, - "3": 128 + "4": 32 }, "bits": [ 8, - 4, - 3 + 4 ], "bits_prop": [ 0.05, - 0.1, - 0.85 + 0.95 ], "scale_bits": 4 } } - } + }, + "q_last_module_idx": 54 } \ No newline at end of file